diff --git a/Cargo.toml b/Cargo.toml index 177ab3db31..80d8b69038 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.60.0" +version = "0.61.0" authors = ["Apache DataFusion "] homepage = "https://github.com/apache/datafusion-sqlparser-rs" documentation = "https://docs.rs/sqlparser/" @@ -42,6 +42,7 @@ std = [] recursive-protection = ["std", "recursive"] # Enable JSON output in the `cli` example: json_example = ["serde_json", "serde"] +derive-dialect = ["sqlparser_derive"] visitor = ["sqlparser_derive"] [dependencies] @@ -54,13 +55,17 @@ serde = { version = "1.0", default-features = false, features = ["derive", "allo # of dev-dependencies because of # https://github.com/rust-lang/cargo/issues/1596 serde_json = { version = "1.0", optional = true } -sqlparser_derive = { version = "0.4.0", path = "derive", optional = true } +sqlparser_derive = { version = "0.5.0", path = "derive", optional = true } [dev-dependencies] simple_logger = "5.0" matches = "0.1" pretty_assertions = "1" +[[test]] +name = "sqlparser_derive_dialect" +required-features = ["derive-dialect"] + [package.metadata.docs.rs] # Document these features on docs.rs -features = ["serde", "visitor"] +features = ["serde", "visitor", "derive-dialect"] diff --git a/README.md b/README.md index 9dfe508103..775d074915 100644 --- a/README.md +++ b/README.md @@ -159,9 +159,9 @@ $ cargo run --features json_example --example cli FILENAME.sql [--dialectname] ## Users -This parser is currently being used by the [DataFusion] query engine, [LocustDB], -[Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], [JumpWire], [ParadeDB], [CipherStash Proxy], -and [GreptimeDB]. +This parser is currently being used by the [DataFusion] query engine, +[LocustDB], [Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], +[JumpWire], [ParadeDB], [CipherStash Proxy], [Readyset] and [GreptimeDB]. If your project is using sqlparser-rs feel free to make a PR to add it to this list. @@ -282,3 +282,4 @@ licensed as above, without any additional terms or conditions. [`GenericDialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/struct.GenericDialect.html [CipherStash Proxy]: https://github.com/cipherstash/proxy [GreptimeDB]: https://github.com/GreptimeTeam/greptimedb +[Readyset]: https://github.com/readysettech/readyset diff --git a/changelog/0.61.0.md b/changelog/0.61.0.md new file mode 100644 index 0000000000..86a9701d53 --- /dev/null +++ b/changelog/0.61.0.md @@ -0,0 +1,127 @@ + + +# sqlparser-rs 0.61.0 Changelog + +This release consists of 66 commits from 22 contributors. See credits at the end of this changelog for more information. + +**Performance related:** + +- perf: remove unnecessary string clone in maybe_concat_string_literal [#2173](https://github.com/apache/datafusion-sqlparser-rs/pull/2173) (andygrove) +- perf: optimize `make_word()` to avoid unnecessary allocations [#2176](https://github.com/apache/datafusion-sqlparser-rs/pull/2176) (andygrove) + +**Fixed bugs:** + +- fix: Set the current folder as a "primary" for the `find` command [#2120](https://github.com/apache/datafusion-sqlparser-rs/pull/2120) (martin-g) +- fix: qualified column names with SQL keywords parse as identifiers [#2157](https://github.com/apache/datafusion-sqlparser-rs/pull/2157) (bombsimon) + +**Other:** + +- Fixing location for extrenal tables [#2108](https://github.com/apache/datafusion-sqlparser-rs/pull/2108) (romanoff) +- Added support for `ALTER OPERATOR` syntax [#2114](https://github.com/apache/datafusion-sqlparser-rs/pull/2114) (LucaCappelletti94) +- Oracle: Support for MERGE predicates [#2101](https://github.com/apache/datafusion-sqlparser-rs/pull/2101) (xitep) +- [Oracle] Lower StringConcat precedence [#2115](https://github.com/apache/datafusion-sqlparser-rs/pull/2115) (xitep) +- Added alter external table support for snowflake [#2122](https://github.com/apache/datafusion-sqlparser-rs/pull/2122) (romanoff) +- MSSQL: Add support for parenthesized stored procedure name in EXEC [#2126](https://github.com/apache/datafusion-sqlparser-rs/pull/2126) (yoavcloud) +- MSSQL: Parse IF/ELSE without semicolon delimiters [#2128](https://github.com/apache/datafusion-sqlparser-rs/pull/2128) (yoavcloud) +- Extract source comments [#2107](https://github.com/apache/datafusion-sqlparser-rs/pull/2107) (xitep) +- PostgreSQL: Support schema-qualified operator classes in CREATE INDEX [#2131](https://github.com/apache/datafusion-sqlparser-rs/pull/2131) (dddenis) +- Oracle: Support for quote delimited strings [#2130](https://github.com/apache/datafusion-sqlparser-rs/pull/2130) (xitep) +- Added support for `ALTER OPERATOR FAMILY` syntax [#2125](https://github.com/apache/datafusion-sqlparser-rs/pull/2125) (LucaCappelletti94) +- PostgreSQL Tokenization: Fix unexpected characters after question mark being silently ignored [#2129](https://github.com/apache/datafusion-sqlparser-rs/pull/2129) (jnlt3) +- Support parsing parenthesized wildcard `(*)` [#2123](https://github.com/apache/datafusion-sqlparser-rs/pull/2123) (romanoff) +- Make benchmark statement valid [#2139](https://github.com/apache/datafusion-sqlparser-rs/pull/2139) (xitep) +- Fix parse_identifiers not taking semicolons into account [#2137](https://github.com/apache/datafusion-sqlparser-rs/pull/2137) (jnlt3) +- Add PostgreSQL PARTITION OF syntax support [#2127](https://github.com/apache/datafusion-sqlparser-rs/pull/2127) (fmguerreiro) +- Databricks: Support Timetravel With "TIMESTAMP AS OF" [#2134](https://github.com/apache/datafusion-sqlparser-rs/pull/2134) (JamesVorder) +- MySQL: Parse bitwise shift left/right operators [#2152](https://github.com/apache/datafusion-sqlparser-rs/pull/2152) (mvzink) +- Redshift: Add support for optional JSON format in copy option [#2141](https://github.com/apache/datafusion-sqlparser-rs/pull/2141) (yoavcloud) +- MySQL: Add missing support for TREE explain format [#2145](https://github.com/apache/datafusion-sqlparser-rs/pull/2145) (yoavcloud) +- MySQL: Add support for && as boolean AND [#2144](https://github.com/apache/datafusion-sqlparser-rs/pull/2144) (yoavcloud) +- PostgreSQL: ALTER USER password option [#2142](https://github.com/apache/datafusion-sqlparser-rs/pull/2142) (yoavcloud) +- Key Value Options: add support for trailing semicolon [#2140](https://github.com/apache/datafusion-sqlparser-rs/pull/2140) (yoavcloud) +- Added support for `ALTER OPERATOR CLASS` syntax [#2135](https://github.com/apache/datafusion-sqlparser-rs/pull/2135) (LucaCappelletti94) +- Added missing `Copy` derives [#2158](https://github.com/apache/datafusion-sqlparser-rs/pull/2158) (LucaCappelletti94) +- Tokenize empty line comments correctly [#2161](https://github.com/apache/datafusion-sqlparser-rs/pull/2161) (zyuiop) +- Add support for DuckDB `LAMBDA` keyword syntax [#2149](https://github.com/apache/datafusion-sqlparser-rs/pull/2149) (lovasoa) +- MySQL: Add support for casting using the BINARY keyword [#2146](https://github.com/apache/datafusion-sqlparser-rs/pull/2146) (yoavcloud) +- Added missing `From` impls for `Statement` variants [#2160](https://github.com/apache/datafusion-sqlparser-rs/pull/2160) (LucaCappelletti94) +- GenericDialect: support colon operator for JsonAccess [#2124](https://github.com/apache/datafusion-sqlparser-rs/pull/2124) (Samyak2) +- Databricks: Support Timetravel With "VERSION AS OF" [#2155](https://github.com/apache/datafusion-sqlparser-rs/pull/2155) (JamesVorder) +- Fixed truncate table if exists for snowflake [#2166](https://github.com/apache/datafusion-sqlparser-rs/pull/2166) (romanoff) +- Refactor: replace some `dialect_of!` checks with `Dialect` trait methods [#2171](https://github.com/apache/datafusion-sqlparser-rs/pull/2171) (andygrove) +- MySQL: Support `CAST(... AS ... ARRAY)` syntax [#2151](https://github.com/apache/datafusion-sqlparser-rs/pull/2151) (mvzink) +- Snowflake: Support SAMPLE clause on subqueries [#2164](https://github.com/apache/datafusion-sqlparser-rs/pull/2164) (finchxxia) +- refactor: use `to_ident()` instead of `clone().into_ident()` for borrowed Words [#2177](https://github.com/apache/datafusion-sqlparser-rs/pull/2177) (andygrove) +- Refactor: replace more `dialect_of!` checks with `Dialect` trait methods [#2175](https://github.com/apache/datafusion-sqlparser-rs/pull/2175) (andygrove) +- minor: reduce unnecessary string allocations [#2178](https://github.com/apache/datafusion-sqlparser-rs/pull/2178) (andygrove) +- PostgreSQL: Support force row level security [#2169](https://github.com/apache/datafusion-sqlparser-rs/pull/2169) (isaacparker0) +- PostgreSQL: Add support for `*` (descendant) option in TRUNCATE [#2181](https://github.com/apache/datafusion-sqlparser-rs/pull/2181) (mvzink) +- Fix identifier parsing not breaking on the `|>` pipe operator [#2156](https://github.com/apache/datafusion-sqlparser-rs/pull/2156) (alexander-beedie) +- [MySQL, Oracle] Parse optimizer hints [#2162](https://github.com/apache/datafusion-sqlparser-rs/pull/2162) (xitep) +- Redshift: Support implicit string concatenation using newline [#2167](https://github.com/apache/datafusion-sqlparser-rs/pull/2167) (yoavcloud) +- PostgreSQL: Fix REPLICA IDENTITY to use NOTHING [#2179](https://github.com/apache/datafusion-sqlparser-rs/pull/2179) (mvzink) +- Add ENFORCED/NOT ENFORCED support for column-level CHECK constraints [#2180](https://github.com/apache/datafusion-sqlparser-rs/pull/2180) (mvzink) +- Implement `core::error::Error` for `ParserError` and `TokenizerError` [#2189](https://github.com/apache/datafusion-sqlparser-rs/pull/2189) (LucaCappelletti94) +- Moved more structs outside of Statement to facilitate reuse [#2188](https://github.com/apache/datafusion-sqlparser-rs/pull/2188) (LucaCappelletti94) +- Fix parsing cast operator after parenthesized `DEFAULT` expression [#2168](https://github.com/apache/datafusion-sqlparser-rs/pull/2168) (isaacparker0) +- Streamlined derivation of new `Dialect` objects [#2174](https://github.com/apache/datafusion-sqlparser-rs/pull/2174) (alexander-beedie) +- MSSQL: Support standalone BEGIN...END blocks [#2186](https://github.com/apache/datafusion-sqlparser-rs/pull/2186) (guan404ming) +- MySQL: Add support for `SELECT` modifiers [#2172](https://github.com/apache/datafusion-sqlparser-rs/pull/2172) (mvzink) +- MySQL: Add support for DEFAULT CHARACTER SET in CREATE DATABASE [#2182](https://github.com/apache/datafusion-sqlparser-rs/pull/2182) (mvzink) +- [Oracle] Support hierarchical queries [#2185](https://github.com/apache/datafusion-sqlparser-rs/pull/2185) (xitep) +- MySQL: Allow optional constraint name after CONSTRAINT keyword [#2183](https://github.com/apache/datafusion-sqlparser-rs/pull/2183) (mvzink) +- Added missing derives to dialect marker structs [#2191](https://github.com/apache/datafusion-sqlparser-rs/pull/2191) (LucaCappelletti94) +- Fixed overflow error, recursion counter was not included for parenthesis [#2199](https://github.com/apache/datafusion-sqlparser-rs/pull/2199) (LucaCappelletti94) +- Add support for C-style comments [#2034](https://github.com/apache/datafusion-sqlparser-rs/pull/2034) (altmannmarcelo) +- PostgreSQL: Support PostgreSQL ANALYZE with optional table and column [#2187](https://github.com/apache/datafusion-sqlparser-rs/pull/2187) (guan404ming) +- Add Tokenizer custom token mapper support [#2184](https://github.com/apache/datafusion-sqlparser-rs/pull/2184) (askalt) +- Fix MAP literals parsing [#2205](https://github.com/apache/datafusion-sqlparser-rs/pull/2205) (Samyak2) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 9 Luca Cappelletti + 9 Yoav Cohen + 8 Michael Victor Zink + 7 xitep + 6 Andy Grove + 4 Andriy Romanov + 2 Alexander Beedie + 2 Andrew Lamb + 2 Guan-Ming (Wesley) Chiu + 2 James Vorderbruggen + 2 Samyak Sarnayak + 2 isaacparker0 + 2 jnlt3 + 1 Albert Skalt + 1 Denis Goncharenko + 1 Filipe Guerreiro + 1 Louis Vialar + 1 Marcelo Altmann + 1 Martin Grigorov + 1 Ophir LOJKINE + 1 Simon Sawert + 1 finchxxia +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/derive/Cargo.toml b/derive/Cargo.toml index 549477041b..9dfa5daf82 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "sqlparser_derive" description = "Procedural (proc) macros for sqlparser" -version = "0.4.0" +version = "0.5.0" authors = ["sqlparser-rs authors"] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser_derive/" @@ -36,6 +36,6 @@ edition = "2021" proc-macro = true [dependencies] -syn = { version = "2.0", default-features = false, features = ["printing", "parsing", "derive", "proc-macro"] } +syn = { version = "2.0", default-features = false, features = ["full", "printing", "parsing", "derive", "proc-macro", "clone-impls"] } proc-macro2 = "1.0" quote = "1.0" diff --git a/derive/src/dialect.rs b/derive/src/dialect.rs new file mode 100644 index 0000000000..9066bf9645 --- /dev/null +++ b/derive/src/dialect.rs @@ -0,0 +1,352 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Implementation of the `derive_dialect!` macro for creating custom SQL dialects. + +use proc_macro2::TokenStream; +use quote::{quote, quote_spanned}; +use std::collections::HashSet; +use syn::{ + braced, + parse::{Parse, ParseStream}, + Error, File, FnArg, Ident, Item, LitBool, LitChar, Pat, ReturnType, Signature, Token, + TraitItem, Type, +}; + +/// Override value types supported by the macro +pub(crate) enum Override { + Bool(LitBool), + Char(LitChar), + None, +} + +/// Parsed input for the `derive_dialect!` macro +pub(crate) struct DeriveDialectInput { + pub name: Ident, + pub base: Type, + pub preserve_type_id: bool, + pub overrides: Vec<(Ident, Override)>, +} + +/// `Dialect` trait method attrs +struct DialectMethod { + name: Ident, + signature: Signature, +} + +impl Parse for DeriveDialectInput { + fn parse(input: ParseStream) -> syn::Result { + let name: Ident = input.parse()?; + input.parse::()?; + let base: Type = input.parse()?; + + let mut preserve_type_id = false; + let mut overrides = Vec::new(); + + while input.peek(Token![,]) { + input.parse::()?; + if input.is_empty() { + break; + } + if input.peek(Ident) { + let ident: Ident = input.parse()?; + match ident.to_string().as_str() { + "preserve_type_id" => { + input.parse::()?; + preserve_type_id = input.parse::()?.value(); + } + "overrides" => { + input.parse::()?; + let content; + braced!(content in input); + while !content.is_empty() { + let key: Ident = content.parse()?; + content.parse::()?; + let value = if content.peek(LitBool) { + Override::Bool(content.parse()?) + } else if content.peek(LitChar) { + Override::Char(content.parse()?) + } else if content.peek(Ident) { + let ident: Ident = content.parse()?; + if ident == "None" { + Override::None + } else { + return Err(Error::new( + ident.span(), + format!("Expected `true`, `false`, a char, or `None`, found `{ident}`"), + )); + } + } else { + return Err( + content.error("Expected `true`, `false`, a char, or `None`") + ); + }; + overrides.push((key, value)); + if content.peek(Token![,]) { + content.parse::()?; + } + } + } + other => { + return Err(Error::new(ident.span(), format!( + "Unknown argument `{other}`. Expected `preserve_type_id` or `overrides`." + ))); + } + } + } + } + Ok(DeriveDialectInput { + name, + base, + preserve_type_id, + overrides, + }) + } +} + +/// Entry point for the `derive_dialect!` macro +pub(crate) fn derive_dialect(input: DeriveDialectInput) -> proc_macro::TokenStream { + match derive_dialect_inner(input) { + Ok(tokens) => tokens.into(), + Err(e) => e.to_compile_error().into(), + } +} + +fn derive_dialect_inner(input: DeriveDialectInput) -> syn::Result { + let call_site = proc_macro2::Span::call_site(); + + let source = read_dialect_mod_file() + .map_err(|e| Error::new(call_site, format!("Failed to read dialect/mod.rs: {e}")))?; + let file: File = syn::parse_str::(&source) + .map_err(|e| Error::new(call_site, format!("Failed to parse source: {e}")))?; + let methods = extract_dialect_methods(&file)?; + + // Validate overrides + let bool_names: HashSet<_> = methods + .iter() + .filter(|m| is_bool_method(&m.signature)) + .map(|m| m.name.to_string()) + .collect(); + for (key, value) in &input.overrides { + let key_str = key.to_string(); + match value { + Override::Bool(_) if !bool_names.contains(&key_str) => { + return Err(Error::new( + key.span(), + format!("Unknown boolean method `{key_str}`"), + )); + } + Override::Char(_) | Override::None if key_str != "identifier_quote_style" => { + return Err(Error::new( + key.span(), + format!("Char/None only valid for `identifier_quote_style`, not `{key_str}`"), + )); + } + _ => {} + } + } + Ok(generate_derived_dialect(&input, &methods)) +} + +/// Generate the complete derived `Dialect` implementation +fn generate_derived_dialect(input: &DeriveDialectInput, methods: &[DialectMethod]) -> TokenStream { + let name = &input.name; + let base = &input.base; + + // Helper to find an override by method name + let find_override = |method_name: &str| { + input + .overrides + .iter() + .find(|(k, _)| k == method_name) + .map(|(_, v)| v) + }; + + // Helper to generate delegation to base dialect + let delegate = |method: &DialectMethod| { + let sig = &method.signature; + let method_name = &method.name; + let params = extract_param_names(sig); + quote_spanned! { method_name.span() => #sig { self.dialect.#method_name(#(#params),*) } } + }; + + // Generate the struct + let struct_def = quote_spanned! { name.span() => + #[derive(Debug, Default)] + pub struct #name { + dialect: #base, + } + impl #name { + pub fn new() -> Self { Self::default() } + } + }; + + // Generate TypeId method body + let type_id_body = if input.preserve_type_id { + quote! { Dialect::dialect(&self.dialect) } + } else { + quote! { ::core::any::TypeId::of::<#name>() } + }; + + // Generate method implementations + let method_impls = methods.iter().map(|method| { + let method_name = &method.name; + match find_override(&method_name.to_string()) { + Some(Override::Bool(value)) => { + quote_spanned! { method_name.span() => fn #method_name(&self) -> bool { #value } } + } + Some(Override::Char(c)) => { + quote_spanned! { method_name.span() => + fn identifier_quote_style(&self, _: &str) -> Option { Some(#c) } + } + } + Some(Override::None) => { + quote_spanned! { method_name.span() => + fn identifier_quote_style(&self, _: &str) -> Option { None } + } + } + None => delegate(method), + } + }); + + // Wrap impl in a const block with scoped imports so types resolve without qualification + quote! { + #struct_def + const _: () = { + use ::core::iter::Peekable; + use ::core::str::Chars; + use sqlparser::ast::{ColumnOption, Expr, GranteesType, Ident, ObjectNamePart, Statement}; + use sqlparser::dialect::{Dialect, Precedence}; + use sqlparser::keywords::Keyword; + use sqlparser::parser::{Parser, ParserError}; + + impl Dialect for #name { + fn dialect(&self) -> ::core::any::TypeId { #type_id_body } + #(#method_impls)* + } + }; + } +} + +/// Extract parameter names from a method signature (excluding self) +fn extract_param_names(sig: &Signature) -> Vec<&Ident> { + sig.inputs + .iter() + .filter_map(|arg| match arg { + FnArg::Typed(pt) => match pt.pat.as_ref() { + Pat::Ident(pi) => Some(&pi.ident), + _ => None, + }, + _ => None, + }) + .collect() +} + +/// Read the `dialect/mod.rs` file that contains the Dialect trait. +/// +/// Searches for the file in the following order: +/// 1. `$CARGO_MANIFEST_DIR/src/dialect/mod.rs` - works when the macro is +/// invoked from within the `sqlparser` crate itself (e.g. in tests). +/// 2. `/../src/dialect/mod.rs` - works when +/// `sqlparser_derive` lives in a workspace alongside the main crate +/// (the standard `derive/` layout). +/// 3. Sibling directories of the compiled `sqlparser_derive` crate in the +/// Cargo registry - works when an external crate uses `derive_dialect!` +/// via a registry dependency. +fn read_dialect_mod_file() -> Result { + use std::path::{Path, PathBuf}; + + const DERIVE_CRATE_DIR: &str = env!("CARGO_MANIFEST_DIR"); + let derive_dir = Path::new(DERIVE_CRATE_DIR); + let mut candidates: Vec = Vec::new(); + + // The crate being compiled (eg: within sqlparser). + if let Ok(manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") { + candidates.push(Path::new(&manifest_dir).join("src/dialect/mod.rs")); + } + // Workspace layout: the main crate is the parent of `derive/`. + candidates.push(derive_dir.join("../src/dialect/mod.rs")); + + // Cargo registry: look for sibling `sqlparser-*` directories (prefer newest). + if let Some(parent) = derive_dir.parent() { + if let Ok(entries) = std::fs::read_dir(parent) { + let mut siblings: Vec<_> = entries + .filter_map(|e| e.ok()) + .filter(|e| { + let name = e.file_name(); + let name = name.to_string_lossy(); + name.starts_with("sqlparser-") && !name.starts_with("sqlparser-derive") + }) + .collect(); + siblings.sort_by(|a, b| b.file_name().cmp(&a.file_name())); + candidates.extend( + siblings + .into_iter() + .map(|e| e.path().join("src/dialect/mod.rs")), + ); + } + } + for path in &candidates { + if let Ok(content) = std::fs::read_to_string(path) { + return Ok(content); + } + } + Err(format!( + "Could not find `sqlparser` dialect/mod.rs file. \ + Searched in $CARGO_MANIFEST_DIR/src/dialect/mod.rs and \ + the `sqlparser_derive` crate at {DERIVE_CRATE_DIR}" + )) +} + +/// Extract all methods from the `Dialect` trait (excluding `dialect` for TypeId) +fn extract_dialect_methods(file: &File) -> Result, Error> { + let dialect_trait = file + .items + .iter() + .find_map(|item| match item { + Item::Trait(t) if t.ident == "Dialect" => Some(t), + _ => None, + }) + .ok_or_else(|| Error::new(proc_macro2::Span::call_site(), "Dialect trait not found"))?; + + let mut methods: Vec<_> = dialect_trait + .items + .iter() + .filter_map(|item| match item { + TraitItem::Fn(m) if m.sig.ident != "dialect" => Some(DialectMethod { + name: m.sig.ident.clone(), + signature: m.sig.clone(), + }), + _ => None, + }) + .collect(); + methods.sort_by_key(|m| m.name.to_string()); + Ok(methods) +} + +/// Check if a method signature is `fn name(&self) -> bool` +fn is_bool_method(sig: &Signature) -> bool { + sig.inputs.len() == 1 + && matches!( + sig.inputs.first(), + Some(FnArg::Receiver(r)) if r.reference.is_some() && r.mutability.is_none() + ) + && matches!( + &sig.output, + ReturnType::Type(_, ty) if matches!(ty.as_ref(), Type::Path(p) if p.path.is_ident("bool")) + ) +} diff --git a/derive/src/lib.rs b/derive/src/lib.rs index 08c5c5db4b..e3eaeea6d5 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -15,22 +15,25 @@ // specific language governing permissions and limitations // under the License. -use proc_macro2::TokenStream; -use quote::{format_ident, quote, quote_spanned, ToTokens}; -use syn::spanned::Spanned; -use syn::{ - parse::{Parse, ParseStream}, - parse_macro_input, parse_quote, Attribute, Data, DeriveInput, Fields, GenericParam, Generics, - Ident, Index, LitStr, Meta, Token, Type, TypePath, -}; -use syn::{Path, PathArguments}; +//! Procedural macros for sqlparser. +//! +//! This crate provides: +//! - [`Visit`] and [`VisitMut`] derive macros for AST traversal. +//! - [`derive_dialect!`] macro for creating custom SQL dialects. -/// Implementation of `[#derive(Visit)]` +use quote::quote; +use syn::parse_macro_input; + +mod dialect; +mod visit; + +/// Implementation of `#[derive(VisitMut)]` #[proc_macro_derive(VisitMut, attributes(visit))] pub fn derive_visit_mut(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - derive_visit( + let input = parse_macro_input!(input as syn::DeriveInput); + visit::derive_visit( input, - &VisitType { + &visit::VisitType { visit_trait: quote!(VisitMut), visitor_trait: quote!(VisitorMut), modifier: Some(quote!(mut)), @@ -38,12 +41,13 @@ pub fn derive_visit_mut(input: proc_macro::TokenStream) -> proc_macro::TokenStre ) } -/// Implementation of `[#derive(Visit)]` +/// Implementation of `#[derive(Visit)]` #[proc_macro_derive(Visit, attributes(visit))] pub fn derive_visit_immutable(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - derive_visit( + let input = parse_macro_input!(input as syn::DeriveInput); + visit::derive_visit( input, - &VisitType { + &visit::VisitType { visit_trait: quote!(Visit), visitor_trait: quote!(Visitor), modifier: None, @@ -51,241 +55,9 @@ pub fn derive_visit_immutable(input: proc_macro::TokenStream) -> proc_macro::Tok ) } -struct VisitType { - visit_trait: TokenStream, - visitor_trait: TokenStream, - modifier: Option, -} - -fn derive_visit(input: proc_macro::TokenStream, visit_type: &VisitType) -> proc_macro::TokenStream { - // Parse the input tokens into a syntax tree. - let input = parse_macro_input!(input as DeriveInput); - let name = input.ident; - - let VisitType { - visit_trait, - visitor_trait, - modifier, - } = visit_type; - - let attributes = Attributes::parse(&input.attrs); - // Add a bound `T: Visit` to every type parameter T. - let generics = add_trait_bounds(input.generics, visit_type); - let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); - - let (pre_visit, post_visit) = attributes.visit(quote!(self)); - let children = visit_children(&input.data, visit_type); - - let expanded = quote! { - // The generated impl. - // Note that it uses [`recursive::recursive`] to protect from stack overflow. - // See tests in https://github.com/apache/datafusion-sqlparser-rs/pull/1522/ for more info. - impl #impl_generics sqlparser::ast::#visit_trait for #name #ty_generics #where_clause { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] - fn visit( - &#modifier self, - visitor: &mut V - ) -> ::std::ops::ControlFlow { - #pre_visit - #children - #post_visit - ::std::ops::ControlFlow::Continue(()) - } - } - }; - - proc_macro::TokenStream::from(expanded) -} - -/// Parses attributes that can be provided to this macro -/// -/// `#[visit(leaf, with = "visit_expr")]` -#[derive(Default)] -struct Attributes { - /// Content for the `with` attribute - with: Option, -} - -struct WithIdent { - with: Option, -} -impl Parse for WithIdent { - fn parse(input: ParseStream) -> Result { - let mut result = WithIdent { with: None }; - let ident = input.parse::()?; - if ident != "with" { - return Err(syn::Error::new( - ident.span(), - "Expected identifier to be `with`", - )); - } - input.parse::()?; - let s = input.parse::()?; - result.with = Some(format_ident!("{}", s.value(), span = s.span())); - Ok(result) - } -} - -impl Attributes { - fn parse(attrs: &[Attribute]) -> Self { - let mut out = Self::default(); - for attr in attrs { - if let Meta::List(ref metalist) = attr.meta { - if metalist.path.is_ident("visit") { - match syn::parse2::(metalist.tokens.clone()) { - Ok(with_ident) => { - out.with = with_ident.with; - } - Err(e) => { - panic!("{}", e); - } - } - } - } - } - out - } - - /// Returns the pre and post visit token streams - fn visit(&self, s: TokenStream) -> (Option, Option) { - let pre_visit = self.with.as_ref().map(|m| { - let m = format_ident!("pre_{}", m); - quote!(visitor.#m(#s)?;) - }); - let post_visit = self.with.as_ref().map(|m| { - let m = format_ident!("post_{}", m); - quote!(visitor.#m(#s)?;) - }); - (pre_visit, post_visit) - } -} - -// Add a bound `T: Visit` to every type parameter T. -fn add_trait_bounds(mut generics: Generics, VisitType { visit_trait, .. }: &VisitType) -> Generics { - for param in &mut generics.params { - if let GenericParam::Type(ref mut type_param) = *param { - type_param - .bounds - .push(parse_quote!(sqlparser::ast::#visit_trait)); - } - } - generics -} - -// Generate the body of the visit implementation for the given type -fn visit_children( - data: &Data, - VisitType { - visit_trait, - modifier, - .. - }: &VisitType, -) -> TokenStream { - match data { - Data::Struct(data) => match &data.fields { - Fields::Named(fields) => { - let recurse = fields.named.iter().map(|f| { - let name = &f.ident; - let is_option = is_option(&f.ty); - let attributes = Attributes::parse(&f.attrs); - if is_option && attributes.with.is_some() { - let (pre_visit, post_visit) = attributes.visit(quote!(value)); - quote_spanned!(f.span() => - if let Some(value) = &#modifier self.#name { - #pre_visit sqlparser::ast::#visit_trait::visit(value, visitor)?; #post_visit - } - ) - } else { - let (pre_visit, post_visit) = attributes.visit(quote!(&#modifier self.#name)); - quote_spanned!(f.span() => - #pre_visit sqlparser::ast::#visit_trait::visit(&#modifier self.#name, visitor)?; #post_visit - ) - } - }); - quote! { - #(#recurse)* - } - } - Fields::Unnamed(fields) => { - let recurse = fields.unnamed.iter().enumerate().map(|(i, f)| { - let index = Index::from(i); - let attributes = Attributes::parse(&f.attrs); - let (pre_visit, post_visit) = attributes.visit(quote!(&self.#index)); - quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(&#modifier self.#index, visitor)?; #post_visit) - }); - quote! { - #(#recurse)* - } - } - Fields::Unit => { - quote!() - } - }, - Data::Enum(data) => { - let statements = data.variants.iter().map(|v| { - let name = &v.ident; - match &v.fields { - Fields::Named(fields) => { - let names = fields.named.iter().map(|f| &f.ident); - let visit = fields.named.iter().map(|f| { - let name = &f.ident; - let attributes = Attributes::parse(&f.attrs); - let (pre_visit, post_visit) = attributes.visit(name.to_token_stream()); - quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(#name, visitor)?; #post_visit) - }); - - quote!( - Self::#name { #(#names),* } => { - #(#visit)* - } - ) - } - Fields::Unnamed(fields) => { - let names = fields.unnamed.iter().enumerate().map(|(i, f)| format_ident!("_{}", i, span = f.span())); - let visit = fields.unnamed.iter().enumerate().map(|(i, f)| { - let name = format_ident!("_{}", i); - let attributes = Attributes::parse(&f.attrs); - let (pre_visit, post_visit) = attributes.visit(name.to_token_stream()); - quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(#name, visitor)?; #post_visit) - }); - - quote! { - Self::#name ( #(#names),*) => { - #(#visit)* - } - } - } - Fields::Unit => { - quote! { - Self::#name => {} - } - } - } - }); - - quote! { - match self { - #(#statements),* - } - } - } - Data::Union(_) => unimplemented!(), - } -} - -fn is_option(ty: &Type) -> bool { - if let Type::Path(TypePath { - path: Path { segments, .. }, - .. - }) = ty - { - if let Some(segment) = segments.last() { - if segment.ident == "Option" { - if let PathArguments::AngleBracketed(args) = &segment.arguments { - return args.args.len() == 1; - } - } - } - } - false +/// Procedural macro for deriving new SQL dialects. +#[proc_macro] +pub fn derive_dialect(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = parse_macro_input!(input as dialect::DeriveDialectInput); + dialect::derive_dialect(input) } diff --git a/derive/src/visit.rs b/derive/src/visit.rs new file mode 100644 index 0000000000..baf3eb583b --- /dev/null +++ b/derive/src/visit.rs @@ -0,0 +1,268 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Implementation of the `Visit` and `VisitMut` derive macros. + +use proc_macro2::TokenStream; +use quote::{format_ident, quote, quote_spanned, ToTokens}; +use syn::spanned::Spanned; +use syn::{ + parse::{Parse, ParseStream}, + parse_quote, Attribute, Data, Fields, GenericParam, Generics, Ident, Index, LitStr, Meta, + Token, Type, TypePath, +}; +use syn::{Path, PathArguments}; + +pub(crate) struct VisitType { + pub visit_trait: TokenStream, + pub visitor_trait: TokenStream, + pub modifier: Option, +} + +pub(crate) fn derive_visit( + input: syn::DeriveInput, + visit_type: &VisitType, +) -> proc_macro::TokenStream { + let name = input.ident; + + let VisitType { + visit_trait, + visitor_trait, + modifier, + } = visit_type; + + let attributes = Attributes::parse(&input.attrs); + // Add a bound `T: Visit` to every type parameter T. + let generics = add_trait_bounds(input.generics, visit_type); + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let (pre_visit, post_visit) = attributes.visit(quote!(self)); + let children = visit_children(&input.data, visit_type); + + let expanded = quote! { + // The generated impl. + // Note that it uses [`recursive::recursive`] to protect from stack overflow. + // See tests in https://github.com/apache/datafusion-sqlparser-rs/pull/1522/ for more info. + impl #impl_generics sqlparser::ast::#visit_trait for #name #ty_generics #where_clause { + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + fn visit( + &#modifier self, + visitor: &mut V + ) -> ::std::ops::ControlFlow { + #pre_visit + #children + #post_visit + ::std::ops::ControlFlow::Continue(()) + } + } + }; + + proc_macro::TokenStream::from(expanded) +} + +/// Parses attributes that can be provided to this macro +/// +/// `#[visit(leaf, with = "visit_expr")]` +#[derive(Default)] +struct Attributes { + /// Content for the `with` attribute + with: Option, +} + +struct WithIdent { + with: Option, +} +impl Parse for WithIdent { + fn parse(input: ParseStream) -> Result { + let mut result = WithIdent { with: None }; + let ident = input.parse::()?; + if ident != "with" { + return Err(syn::Error::new( + ident.span(), + "Expected identifier to be `with`", + )); + } + input.parse::()?; + let s = input.parse::()?; + result.with = Some(format_ident!("{}", s.value(), span = s.span())); + Ok(result) + } +} + +impl Attributes { + fn parse(attrs: &[Attribute]) -> Self { + let mut out = Self::default(); + for attr in attrs { + if let Meta::List(ref metalist) = attr.meta { + if metalist.path.is_ident("visit") { + match syn::parse2::(metalist.tokens.clone()) { + Ok(with_ident) => { + out.with = with_ident.with; + } + Err(e) => { + panic!("{}", e); + } + } + } + } + } + out + } + + /// Returns the pre and post visit token streams + fn visit(&self, s: TokenStream) -> (Option, Option) { + let pre_visit = self.with.as_ref().map(|m| { + let m = format_ident!("pre_{}", m); + quote!(visitor.#m(#s)?;) + }); + let post_visit = self.with.as_ref().map(|m| { + let m = format_ident!("post_{}", m); + quote!(visitor.#m(#s)?;) + }); + (pre_visit, post_visit) + } +} + +// Add a bound `T: Visit` to every type parameter T. +fn add_trait_bounds(mut generics: Generics, VisitType { visit_trait, .. }: &VisitType) -> Generics { + for param in &mut generics.params { + if let GenericParam::Type(ref mut type_param) = *param { + type_param + .bounds + .push(parse_quote!(sqlparser::ast::#visit_trait)); + } + } + generics +} + +// Generate the body of the visit implementation for the given type +fn visit_children( + data: &Data, + VisitType { + visit_trait, + modifier, + .. + }: &VisitType, +) -> TokenStream { + match data { + Data::Struct(data) => match &data.fields { + Fields::Named(fields) => { + let recurse = fields.named.iter().map(|f| { + let name = &f.ident; + let is_option = is_option(&f.ty); + let attributes = Attributes::parse(&f.attrs); + if is_option && attributes.with.is_some() { + let (pre_visit, post_visit) = attributes.visit(quote!(value)); + quote_spanned!(f.span() => + if let Some(value) = &#modifier self.#name { + #pre_visit sqlparser::ast::#visit_trait::visit(value, visitor)?; #post_visit + } + ) + } else { + let (pre_visit, post_visit) = attributes.visit(quote!(&#modifier self.#name)); + quote_spanned!(f.span() => + #pre_visit sqlparser::ast::#visit_trait::visit(&#modifier self.#name, visitor)?; #post_visit + ) + } + }); + quote! { + #(#recurse)* + } + } + Fields::Unnamed(fields) => { + let recurse = fields.unnamed.iter().enumerate().map(|(i, f)| { + let index = Index::from(i); + let attributes = Attributes::parse(&f.attrs); + let (pre_visit, post_visit) = attributes.visit(quote!(&self.#index)); + quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(&#modifier self.#index, visitor)?; #post_visit) + }); + quote! { + #(#recurse)* + } + } + Fields::Unit => { + quote!() + } + }, + Data::Enum(data) => { + let statements = data.variants.iter().map(|v| { + let name = &v.ident; + match &v.fields { + Fields::Named(fields) => { + let names = fields.named.iter().map(|f| &f.ident); + let visit = fields.named.iter().map(|f| { + let name = &f.ident; + let attributes = Attributes::parse(&f.attrs); + let (pre_visit, post_visit) = attributes.visit(name.to_token_stream()); + quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(#name, visitor)?; #post_visit) + }); + + quote!( + Self::#name { #(#names),* } => { + #(#visit)* + } + ) + } + Fields::Unnamed(fields) => { + let names = fields.unnamed.iter().enumerate().map(|(i, f)| format_ident!("_{}", i, span = f.span())); + let visit = fields.unnamed.iter().enumerate().map(|(i, f)| { + let name = format_ident!("_{}", i); + let attributes = Attributes::parse(&f.attrs); + let (pre_visit, post_visit) = attributes.visit(name.to_token_stream()); + quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(#name, visitor)?; #post_visit) + }); + + quote! { + Self::#name ( #(#names),*) => { + #(#visit)* + } + } + } + Fields::Unit => { + quote! { + Self::#name => {} + } + } + } + }); + + quote! { + match self { + #(#statements),* + } + } + } + Data::Union(_) => unimplemented!(), + } +} + +fn is_option(ty: &Type) -> bool { + if let Type::Path(TypePath { + path: Path { segments, .. }, + .. + }) = ty + { + if let Some(segment) = segments.last() { + if segment.ident == "Option" { + if let PathArguments::AngleBracketed(args) = &segment.arguments { + return args.args.len() == 1; + } + } + } + } + false +} diff --git a/dev/release/release-tarball.sh b/dev/release/release-tarball.sh index e59b2776cc..8e0e40214b 100755 --- a/dev/release/release-tarball.sh +++ b/dev/release/release-tarball.sh @@ -43,6 +43,13 @@ fi version=$1 rc=$2 +read -r -p "Proceed to release tarball for ${version}-rc${rc}? [y/N]: " answer +answer=${answer:-no} +if [ "${answer}" != "y" ]; then + echo "Cancelled tarball release!" + exit 1 +fi + tmp_dir=tmp-apache-datafusion-dist echo "Recreate temporary directory: ${tmp_dir}" diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 089a6ffc7f..4e97c6e2a1 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -124,7 +124,7 @@ test_source_distribution() { cargo build cargo test --all-features - if ( find -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then + if ( find . -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then echo "Cargo.toml version should not contain SNAPSHOT for releases" exit 1 fi diff --git a/fuzz/fuzz_targets/fuzz_parse_sql.rs b/fuzz/fuzz_targets/fuzz_parse_sql.rs index 446b036cd7..15c198cb23 100644 --- a/fuzz/fuzz_targets/fuzz_parse_sql.rs +++ b/fuzz/fuzz_targets/fuzz_parse_sql.rs @@ -16,14 +16,35 @@ // under the License. use honggfuzz::fuzz; -use sqlparser::dialect::GenericDialect; +use sqlparser::dialect::{ + AnsiDialect, BigQueryDialect, ClickHouseDialect, DatabricksDialect, DuckDbDialect, + GenericDialect, HiveDialect, MsSqlDialect, MySqlDialect, OracleDialect, PostgreSqlDialect, + RedshiftSqlDialect, SQLiteDialect, SnowflakeDialect, +}; use sqlparser::parser::Parser; fn main() { + let dialects: Vec> = vec![ + Box::new(AnsiDialect::default()), + Box::new(BigQueryDialect::default()), + Box::new(ClickHouseDialect::default()), + Box::new(DatabricksDialect::default()), + Box::new(DuckDbDialect::default()), + Box::new(GenericDialect::default()), + Box::new(HiveDialect::default()), + Box::new(MsSqlDialect::default()), + Box::new(MySqlDialect::default()), + Box::new(OracleDialect::default()), + Box::new(PostgreSqlDialect::default()), + Box::new(RedshiftSqlDialect::default()), + Box::new(SQLiteDialect::default()), + Box::new(SnowflakeDialect::default()), + ]; loop { fuzz!(|data: String| { - let dialect = GenericDialect {}; - let _ = Parser::parse_sql(&dialect, &data); + for dialect in &dialects { + let _ = Parser::parse_sql(dialect.as_ref(), &data); + } }); } } diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 6132ee4329..b52683aa55 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -17,7 +17,9 @@ use criterion::{criterion_group, criterion_main, Criterion}; use sqlparser::dialect::GenericDialect; +use sqlparser::keywords::Keyword; use sqlparser::parser::Parser; +use sqlparser::tokenizer::{Span, Word}; fn basic_queries(c: &mut Criterion) { let mut group = c.benchmark_group("sqlparser-rs parsing benchmark"); @@ -51,7 +53,7 @@ fn basic_queries(c: &mut Criterion) { let tables = (0..1000) .map(|n| format!("TABLE_{n}")) .collect::>() - .join(" JOIN "); + .join(" CROSS JOIN "); let where_condition = (0..1000) .map(|n| format!("COL_{n} = {n}")) .collect::>() @@ -82,5 +84,73 @@ fn basic_queries(c: &mut Criterion) { }); } -criterion_group!(benches, basic_queries); +/// Benchmark comparing `to_ident(&self)` vs `clone().into_ident(self)`. +/// +/// Both approaches have equivalent performance since the String clone dominates. +/// `to_ident()` is preferred for clearer code (one method call vs two). +fn word_to_ident(c: &mut Criterion) { + let mut group = c.benchmark_group("word_to_ident"); + + // Create Word instances with varying identifier lengths + let words: Vec = (0..100) + .map(|i| Word { + value: format!("identifier_name_with_number_{i}"), + quote_style: None, + keyword: Keyword::NoKeyword, + }) + .collect(); + let span = Span::empty(); + + // clone().into_ident(): clones entire Word struct, then moves the String value + group.bench_function("clone_into_ident_100x", |b| { + b.iter(|| { + for w in &words { + std::hint::black_box(w.clone().into_ident(span)); + } + }); + }); + + // to_ident(): clones only the String value directly into the Ident + group.bench_function("to_ident_100x", |b| { + b.iter(|| { + for w in &words { + std::hint::black_box(w.to_ident(span)); + } + }); + }); + + group.finish(); +} + +/// Benchmark parsing queries with many identifiers to show real-world impact +fn parse_many_identifiers(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_identifiers"); + let dialect = GenericDialect {}; + + // Query with many column references (identifiers) + let many_columns = (0..100) + .map(|n| format!("column_{n}")) + .collect::>() + .join(", "); + let query = format!("SELECT {many_columns} FROM my_table"); + + group.bench_function("select_100_columns", |b| { + b.iter(|| Parser::parse_sql(&dialect, std::hint::black_box(&query))); + }); + + // Query with many table.column references + let qualified_columns = (0..100) + .map(|n| format!("t{}.column_{n}", n % 5)) + .collect::>() + .join(", "); + let query_qualified = format!("SELECT {qualified_columns} FROM t0, t1, t2, t3, t4"); + + group.bench_function("select_100_qualified_columns", |b| { + b.iter(|| Parser::parse_sql(&dialect, std::hint::black_box(&query_qualified))); + }); + + group.finish(); +} + +criterion_group!(benches, basic_queries, word_to_ident, parse_many_identifiers); criterion_main!(benches); diff --git a/src/ast/comments.rs b/src/ast/comments.rs new file mode 100644 index 0000000000..7744c89e2c --- /dev/null +++ b/src/ast/comments.rs @@ -0,0 +1,338 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Provides a representation of source code comments in parsed SQL code. +//! +//! See [Comments::find] for an example. + +#[cfg(not(feature = "std"))] +use alloc::{string::String, vec::Vec}; + +use core::{ + ops::{Bound, Deref, RangeBounds}, + slice, +}; + +use crate::tokenizer::{Location, Span}; + +/// An opaque container for comments from a parse SQL source code. +#[derive(Default, Debug, Clone)] +pub struct Comments(Vec); + +impl Comments { + /// Accepts `comment` if its the first or is located strictly after the + /// last accepted comment. In other words, this method will skip the + /// comment if its comming out of order (as encountered in the parsed + /// source code.) + pub(crate) fn offer(&mut self, comment: CommentWithSpan) { + if self + .0 + .last() + .map(|last| last.span < comment.span) + .unwrap_or(true) + { + self.0.push(comment); + } + } + + /// Finds comments starting within the given location range. The order of + /// iterator reflects the order of the comments as encountered in the parsed + /// source code. + /// + /// # Example + /// ```rust + /// use sqlparser::{dialect::GenericDialect, parser::Parser, tokenizer::Location}; + /// + /// let sql = r#"/* + /// header comment ... + /// ... spanning multiple lines + /// */ + /// + /// -- first statement + /// SELECT 'hello' /* world */ FROM DUAL; + /// + /// -- second statement + /// SELECT 123 FROM DUAL; + /// + /// -- trailing comment + /// "#; + /// + /// let (ast, comments) = Parser::parse_sql_with_comments(&GenericDialect, sql).unwrap(); + /// + /// // all comments appearing before line seven, i.e. before the first statement itself + /// assert_eq!( + /// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::>(), + /// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]); + /// + /// // all comments appearing within the first statement + /// assert_eq!( + /// &comments.find(Location::new(7, 1)..Location::new(8,1)).map(|c| c.as_str()).collect::>(), + /// &[" world "]); + /// + /// // all comments appearing within or after the first statement + /// assert_eq!( + /// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::>(), + /// &[" world ", " second statement\n", " trailing comment\n"]); + /// ``` + /// + /// The [Spanned](crate::ast::Spanned) trait allows you to access location + /// information for certain AST nodes. + pub fn find>(&self, range: R) -> Iter<'_> { + let (start, end) = ( + self.start_index(range.start_bound()), + self.end_index(range.end_bound()), + ); + debug_assert!((0..=self.0.len()).contains(&start)); + debug_assert!((0..=self.0.len()).contains(&end)); + // in case the user specified a reverse range + Iter(if start <= end { + self.0[start..end].iter() + } else { + self.0[0..0].iter() + }) + } + + /// Find the index of the first comment starting "before" the given location. + /// + /// The returned index is _inclusive_ and within the range of `0..=self.0.len()`. + fn start_index(&self, location: Bound<&Location>) -> usize { + match location { + Bound::Included(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i, + Err(i) => i, + } + } + Bound::Excluded(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i + 1, + Err(i) => i, + } + } + Bound::Unbounded => 0, + } + } + + /// Find the index of the first comment starting "after" the given location. + /// + /// The returned index is _exclusive_ and within the range of `0..=self.0.len()`. + fn end_index(&self, location: Bound<&Location>) -> usize { + match location { + Bound::Included(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i + 1, + Err(i) => i, + } + } + Bound::Excluded(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i, + Err(i) => i, + } + } + Bound::Unbounded => self.0.len(), + } + } +} + +impl From for Vec { + fn from(comments: Comments) -> Self { + comments.0 + } +} + +/// A source code comment with information of its entire span. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct CommentWithSpan { + /// The source code comment iself + pub comment: Comment, + /// The span of the comment including its markers + pub span: Span, +} + +impl Deref for CommentWithSpan { + type Target = Comment; + + fn deref(&self) -> &Self::Target { + &self.comment + } +} + +/// A unified type of the different source code comment formats. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Comment { + /// A single line comment, typically introduced with a prefix and spanning + /// until end-of-line or end-of-file in the source code. + /// + /// Note: `content` will include the terminating new-line character, if any. + /// A single-line comment, typically introduced with a prefix and spanning + /// until end-of-line or end-of-file in the source code. + /// + /// Note: `content` will include the terminating new-line character, if any. + SingleLine { + /// The content of the comment (including trailing newline, if any). + content: String, + /// The prefix introducing the comment (e.g. `--`, `#`). + prefix: String, + }, + + /// A multi-line comment, typically enclosed in `/* .. */` markers. The + /// string represents the content excluding the markers. + MultiLine(String), +} + +impl Comment { + /// Retrieves the content of the comment as string slice. + pub fn as_str(&self) -> &str { + match self { + Comment::SingleLine { content, prefix: _ } => content.as_str(), + Comment::MultiLine(content) => content.as_str(), + } + } +} + +impl Deref for Comment { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +/// An opaque iterator implementation over comments served by [Comments::find]. +pub struct Iter<'a>(slice::Iter<'a, CommentWithSpan>); + +impl<'a> Iterator for Iter<'a> { + type Item = &'a CommentWithSpan; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_find() { + let comments = { + // ``` + // -- abc + // /* hello */--, world + // /* def + // ghi + // jkl + // */ + // ``` + let mut c = Comments(Vec::new()); + c.offer(CommentWithSpan { + comment: Comment::SingleLine { + content: " abc".into(), + prefix: "--".into(), + }, + span: Span::new((1, 1).into(), (1, 7).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::MultiLine(" hello ".into()), + span: Span::new((2, 3).into(), (2, 14).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::SingleLine { + content: ", world".into(), + prefix: "--".into(), + }, + span: Span::new((2, 14).into(), (2, 21).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::MultiLine(" def\n ghi\n jkl\n".into()), + span: Span::new((3, 3).into(), (7, 1).into()), + }); + c + }; + + fn find>(comments: &Comments, range: R) -> Vec<&str> { + comments.find(range).map(|c| c.as_str()).collect::>() + } + + // ~ end-points only -------------------------------------------------- + assert_eq!(find(&comments, ..Location::new(0, 0)), Vec::<&str>::new()); + assert_eq!(find(&comments, ..Location::new(2, 1)), vec![" abc"]); + assert_eq!(find(&comments, ..Location::new(2, 3)), vec![" abc"]); + assert_eq!( + find(&comments, ..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, ..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, ..Location::new(2, 15)), + vec![" abc", " hello ", ", world"] + ); + + // ~ start-points only ------------------------------------------------ + assert_eq!( + find(&comments, Location::new(1000, 1000)..), + Vec::<&str>::new() + ); + assert_eq!( + find(&comments, Location::new(2, 14)..), + vec![", world", " def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(2, 15)..), + vec![" def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(0, 0)..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + + // ~ ranges ----------------------------------------------------------- + assert_eq!( + find(&comments, Location::new(2, 1)..Location::new(1, 1)), + Vec::<&str>::new() + ); + assert_eq!( + find(&comments, Location::new(1, 1)..Location::new(2, 3)), + vec![" abc"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 10)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 14)), + vec![" abc", " hello ", ", world"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..Location::new(2, 15)), + vec![" abc", " hello ", ", world"] + ); + + // ~ find everything -------------------------------------------------- + assert_eq!( + find(&comments, ..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + } +} diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 6da6a90d06..285eec5054 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -32,7 +32,9 @@ use super::{value::escape_single_quote_string, ColumnDef}; #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A member of an ENUM type. pub enum EnumMember { + /// Just a name. Name(String), /// ClickHouse allows to specify an integer value for each enum value. /// @@ -892,7 +894,7 @@ fn format_clickhouse_datetime_precision_and_timezone( } /// Type of brackets used for `STRUCT` literals. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum StructBracketKind { @@ -957,18 +959,31 @@ impl fmt::Display for TimezoneInfo { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum IntervalFields { + /// `YEAR` field Year, + /// `MONTH` field Month, + /// `DAY` field Day, + /// `HOUR` field Hour, + /// `MINUTE` field Minute, + /// `SECOND` field Second, + /// `YEAR TO MONTH` field YearToMonth, + /// `DAY TO HOUR` field DayToHour, + /// `DAY TO MINUTE` field DayToMinute, + /// `DAY TO SECOND` field DayToSecond, + /// `HOUR TO MINUTE` field HourToMinute, + /// `HOUR TO SECOND` field HourToSecond, + /// `MINUTE TO SECOND` field MinuteToSecond, } @@ -1000,11 +1015,11 @@ impl fmt::Display for IntervalFields { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ExactNumberInfo { - /// No additional information, e.g. `DECIMAL` + /// No additional information, e.g. `DECIMAL`. None, - /// Only precision information, e.g. `DECIMAL(10)` + /// Only precision information, e.g. `DECIMAL(10)`. Precision(u64), - /// Precision and scale information, e.g. `DECIMAL(10,2)` + /// Precision and scale information, e.g. `DECIMAL(10,2)`. PrecisionAndScale(u64, i64), } @@ -1031,13 +1046,14 @@ impl fmt::Display for ExactNumberInfo { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CharacterLength { + /// Integer length with optional unit (e.g. `CHAR(10)` or `VARCHAR(10 CHARACTERS)`). IntegerLength { /// Default (if VARYING) or maximum (if not VARYING) length length: u64, /// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly unit: Option, }, - /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Microsoft SQL Server) + /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Microsoft SQL Server). Max, } @@ -1087,12 +1103,16 @@ impl fmt::Display for CharLengthUnits { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Information about [binary length][1], including length and possibly unit. +/// +/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-length pub enum BinaryLength { + /// Integer length for binary types (e.g. `VARBINARY(100)`). IntegerLength { /// Default (if VARYING) length: u64, }, - /// VARBINARY(MAX) used in T-SQL (Microsoft SQL Server) + /// VARBINARY(MAX) used in T-SQL (Microsoft SQL Server). Max, } @@ -1118,13 +1138,13 @@ impl fmt::Display for BinaryLength { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ArrayElemTypeDef { - /// `ARRAY` + /// Use `ARRAY` style without an explicit element type. None, - /// `ARRAY` + /// Angle-bracket style, e.g. `ARRAY`. AngleBracket(Box), - /// `INT[]` or `INT[2]` + /// Square-bracket style, e.g. `INT[]` or `INT[2]`. SquareBracket(Box, Option), - /// `Array(Int64)` + /// Parenthesis style, e.g. `Array(Int64)`. Parenthesis(Box), } @@ -1136,12 +1156,19 @@ pub enum ArrayElemTypeDef { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GeometricTypeKind { + /// Point geometry Point, + /// Line geometry Line, + /// Line segment geometry LineSegment, + /// Box geometry GeometricBox, + /// Path geometry GeometricPath, + /// Polygon geometry Polygon, + /// Circle geometry Circle, } diff --git a/src/ast/dcl.rs b/src/ast/dcl.rs index d04875a733..3c50a81c06 100644 --- a/src/ast/dcl.rs +++ b/src/ast/dcl.rs @@ -29,7 +29,10 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; use super::{display_comma_separated, Expr, Ident, Password, Spanned}; -use crate::ast::{display_separated, ObjectName}; +use crate::ast::{ + display_separated, CascadeOption, CurrentGrantsKind, GrantObjects, Grantee, ObjectName, + Privileges, +}; use crate::tokenizer::Span; /// An option in `ROLE` statement. @@ -39,15 +42,25 @@ use crate::tokenizer::Span; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum RoleOption { + /// Enable or disable BYPASSRLS. BypassRLS(bool), + /// Connection limit expression. ConnectionLimit(Expr), + /// CREATEDB flag. CreateDB(bool), + /// CREATEROLE flag. CreateRole(bool), + /// INHERIT flag. Inherit(bool), + /// LOGIN flag. Login(bool), + /// Password value or NULL password. Password(Password), + /// Replication privilege flag. Replication(bool), + /// SUPERUSER flag. SuperUser(bool), + /// `VALID UNTIL` expression. ValidUntil(Expr), } @@ -104,8 +117,11 @@ impl fmt::Display for RoleOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SetConfigValue { + /// Use the default value. Default, + /// Use the current value (`FROM CURRENT`). FromCurrent, + /// Set to the provided expression value. Value(Expr), } @@ -116,7 +132,9 @@ pub enum SetConfigValue { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ResetConfig { + /// Reset all configuration parameters. ALL, + /// Reset the named configuration parameter. ConfigName(ObjectName), } @@ -127,28 +145,48 @@ pub enum ResetConfig { pub enum AlterRoleOperation { /// Generic RenameRole { + /// Role name to rename. role_name: Ident, }, /// MS SQL Server /// AddMember { + /// Member name to add to the role. member_name: Ident, }, + /// MS SQL Server + /// + /// DropMember { + /// Member name to remove from the role. member_name: Ident, }, /// PostgreSQL /// WithOptions { + /// Role options to apply. options: Vec, }, + /// PostgreSQL + /// + /// + /// `SET configuration_parameter { TO | = } { value | DEFAULT }` Set { + /// Configuration name to set. config_name: ObjectName, + /// Value to assign to the configuration. config_value: SetConfigValue, + /// Optional database scope for the setting. in_database: Option, }, + /// PostgreSQL + /// + /// + /// `RESET configuration_parameter` | `RESET ALL` Reset { + /// Configuration to reset. config_name: ResetConfig, + /// Optional database scope for the reset. in_database: Option, }, } @@ -205,14 +243,22 @@ impl fmt::Display for AlterRoleOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Use { - Catalog(ObjectName), // e.g. `USE CATALOG foo.bar` - Schema(ObjectName), // e.g. `USE SCHEMA foo.bar` - Database(ObjectName), // e.g. `USE DATABASE foo.bar` - Warehouse(ObjectName), // e.g. `USE WAREHOUSE foo.bar` - Role(ObjectName), // e.g. `USE ROLE PUBLIC` - SecondaryRoles(SecondaryRoles), // e.g. `USE SECONDARY ROLES ALL` - Object(ObjectName), // e.g. `USE foo.bar` - Default, // e.g. `USE DEFAULT` + /// Switch to the given catalog (e.g. `USE CATALOG ...`). + Catalog(ObjectName), + /// Switch to the given schema (e.g. `USE SCHEMA ...`). + Schema(ObjectName), + /// Switch to the given database (e.g. `USE DATABASE ...`). + Database(ObjectName), + /// Switch to the given warehouse (e.g. `USE WAREHOUSE ...`). + Warehouse(ObjectName), + /// Switch to the given role (e.g. `USE ROLE ...`). + Role(ObjectName), + /// Use secondary roles specification (e.g. `USE SECONDARY ROLES ...`). + SecondaryRoles(SecondaryRoles), + /// Use the specified object (e.g. `USE foo.bar`). + Object(ObjectName), + /// Reset to default (e.g. `USE DEFAULT`). + Default, } impl fmt::Display for Use { @@ -239,8 +285,11 @@ impl fmt::Display for Use { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SecondaryRoles { + /// Use all secondary roles. All, + /// Use no secondary roles. None, + /// Explicit list of secondary roles. List(Vec), } @@ -260,25 +309,43 @@ impl fmt::Display for SecondaryRoles { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateRole { + /// Role names to create. pub names: Vec, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, // Postgres + /// Whether `LOGIN` was specified. pub login: Option, + /// Whether `INHERIT` was specified. pub inherit: Option, + /// Whether `BYPASSRLS` was specified. pub bypassrls: Option, + /// Optional password for the role. pub password: Option, + /// Whether `SUPERUSER` was specified. pub superuser: Option, + /// Whether `CREATEDB` was specified. pub create_db: Option, + /// Whether `CREATEROLE` was specified. pub create_role: Option, + /// Whether `REPLICATION` privilege was specified. pub replication: Option, + /// Optional connection limit expression. pub connection_limit: Option, + /// Optional account validity expression. pub valid_until: Option, + /// Members of `IN ROLE` clause. pub in_role: Vec, + /// Members of `IN GROUP` clause. pub in_group: Vec, + /// Roles listed in `ROLE` clause. pub role: Vec, + /// Users listed in `USER` clause. pub user: Vec, + /// Admin users listed in `ADMIN` clause. pub admin: Vec, // MSSQL + /// Optional authorization owner. pub authorization_owner: Option, } @@ -363,3 +430,99 @@ impl Spanned for CreateRole { Span::empty() } } + +/// GRANT privileges ON objects TO grantees +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Grant { + /// Privileges being granted. + pub privileges: Privileges, + /// Optional objects the privileges apply to. + pub objects: Option, + /// List of grantees receiving the privileges. + pub grantees: Vec, + /// Whether `WITH GRANT OPTION` is present. + pub with_grant_option: bool, + /// Optional `AS GRANTOR` identifier. + pub as_grantor: Option, + /// Optional `GRANTED BY` identifier. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dcl-statements) + pub granted_by: Option, + /// Optional `CURRENT GRANTS` modifier. + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/grant-privilege) + pub current_grants: Option, +} + +impl fmt::Display for Grant { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "GRANT {privileges}", privileges = self.privileges)?; + if let Some(ref objects) = self.objects { + write!(f, " ON {objects}")?; + } + write!(f, " TO {}", display_comma_separated(&self.grantees))?; + if let Some(ref current_grants) = self.current_grants { + write!(f, " {current_grants}")?; + } + if self.with_grant_option { + write!(f, " WITH GRANT OPTION")?; + } + if let Some(ref as_grantor) = self.as_grantor { + write!(f, " AS {as_grantor}")?; + } + if let Some(ref granted_by) = self.granted_by { + write!(f, " GRANTED BY {granted_by}")?; + } + Ok(()) + } +} + +impl From for crate::ast::Statement { + fn from(v: Grant) -> Self { + crate::ast::Statement::Grant(v) + } +} + +/// REVOKE privileges ON objects FROM grantees +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Revoke { + /// Privileges to revoke. + pub privileges: Privileges, + /// Optional objects from which to revoke. + pub objects: Option, + /// Grantees affected by the revoke. + pub grantees: Vec, + /// Optional `GRANTED BY` identifier. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dcl-statements) + pub granted_by: Option, + /// Optional `CASCADE`/`RESTRICT` behavior. + pub cascade: Option, +} + +impl fmt::Display for Revoke { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "REVOKE {privileges}", privileges = self.privileges)?; + if let Some(ref objects) = self.objects { + write!(f, " ON {objects}")?; + } + write!(f, " FROM {}", display_comma_separated(&self.grantees))?; + if let Some(ref granted_by) = self.granted_by { + write!(f, " GRANTED BY {granted_by}")?; + } + if let Some(ref cascade) = self.cascade { + write!(f, " {cascade}")?; + } + Ok(()) + } +} + +impl From for crate::ast::Statement { + fn from(v: Revoke) -> Self { + crate::ast::Statement::Revoke(v) + } +} diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 0df53c144a..879740f03b 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -43,12 +43,13 @@ use crate::ast::{ }, ArgMode, AttachedToken, CommentDef, ConditionalStatements, CreateFunctionBody, CreateFunctionUsing, CreateTableLikeKind, CreateTableOptions, CreateViewParams, DataType, Expr, - FileFormat, FunctionBehavior, FunctionCalledOnNull, FunctionDesc, FunctionDeterminismSpecifier, - FunctionParallel, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, - HiveSetLocation, Ident, InitializeKind, MySQLColumnPosition, ObjectName, OnCommit, - OneOrManyWithParens, OperateFunctionArg, OrderByExpr, ProjectionSelect, Query, RefreshModeKind, - RowAccessPolicy, SequenceOptions, Spanned, SqlOption, StorageSerializationPolicy, TableVersion, - Tag, TriggerEvent, TriggerExecBody, TriggerObject, TriggerPeriod, TriggerReferencing, Value, + FileFormat, FunctionBehavior, FunctionCalledOnNull, FunctionDefinitionSetParam, FunctionDesc, + FunctionDeterminismSpecifier, FunctionParallel, FunctionSecurity, HiveDistributionStyle, + HiveFormat, HiveIOFormat, HiveRowFormat, HiveSetLocation, Ident, InitializeKind, + MySQLColumnPosition, ObjectName, OnCommit, OneOrManyWithParens, OperateFunctionArg, + OrderByExpr, ProjectionSelect, Query, RefreshModeKind, RowAccessPolicy, SequenceOptions, + Spanned, SqlOption, StorageLifecyclePolicy, StorageSerializationPolicy, TableVersion, Tag, + TriggerEvent, TriggerExecBody, TriggerObject, TriggerPeriod, TriggerReferencing, Value, ValueWithSpan, WrappedCollection, }; use crate::display_utils::{DisplayCommaSeparated, Indent, NewLine, SpaceOrNewline}; @@ -60,8 +61,10 @@ use crate::tokenizer::{Span, Token}; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IndexColumn { + /// The indexed column expression. pub column: OrderByExpr, - pub operator_class: Option, + /// Optional operator class (index operator name). + pub operator_class: Option, } impl From for IndexColumn { @@ -96,16 +99,20 @@ impl fmt::Display for IndexColumn { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ReplicaIdentity { - None, + /// No replica identity (`REPLICA IDENTITY NOTHING`). + Nothing, + /// Full replica identity (`REPLICA IDENTITY FULL`). Full, + /// Default replica identity (`REPLICA IDENTITY DEFAULT`). Default, + /// Use the given index as replica identity (`REPLICA IDENTITY USING INDEX`). Index(Ident), } impl fmt::Display for ReplicaIdentity { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - ReplicaIdentity::None => f.write_str("NONE"), + ReplicaIdentity::Nothing => f.write_str("NOTHING"), ReplicaIdentity::Full => f.write_str("FULL"), ReplicaIdentity::Default => f.write_str("DEFAULT"), ReplicaIdentity::Index(idx) => write!(f, "USING INDEX {idx}"), @@ -120,7 +127,9 @@ impl fmt::Display for ReplicaIdentity { pub enum AlterTableOperation { /// `ADD [NOT VALID]` AddConstraint { + /// The table constraint to add. constraint: TableConstraint, + /// Whether the constraint should be marked `NOT VALID`. not_valid: bool, }, /// `ADD [COLUMN] [IF NOT EXISTS] ` @@ -139,8 +148,11 @@ pub enum AlterTableOperation { /// Note: this is a ClickHouse-specific operation. /// Please refer to [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/projection#add-projection) AddProjection { + /// Whether `IF NOT EXISTS` was specified. if_not_exists: bool, + /// Name of the projection to add. name: Ident, + /// The projection's select clause. select: ProjectionSelect, }, /// `DROP PROJECTION [IF EXISTS] name` @@ -148,7 +160,9 @@ pub enum AlterTableOperation { /// Note: this is a ClickHouse-specific operation. /// Please refer to [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/projection#drop-projection) DropProjection { + /// Whether `IF EXISTS` was specified. if_exists: bool, + /// Name of the projection to drop. name: Ident, }, /// `MATERIALIZE PROJECTION [IF EXISTS] name [IN PARTITION partition_name]` @@ -156,8 +170,11 @@ pub enum AlterTableOperation { /// Note: this is a ClickHouse-specific operation. /// Please refer to [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/projection#materialize-projection) MaterializeProjection { + /// Whether `IF EXISTS` was specified. if_exists: bool, + /// Name of the projection to materialize. name: Ident, + /// Optional partition name to operate on. partition: Option, }, /// `CLEAR PROJECTION [IF EXISTS] name [IN PARTITION partition_name]` @@ -165,37 +182,50 @@ pub enum AlterTableOperation { /// Note: this is a ClickHouse-specific operation. /// Please refer to [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/projection#clear-projection) ClearProjection { + /// Whether `IF EXISTS` was specified. if_exists: bool, + /// Name of the projection to clear. name: Ident, + /// Optional partition name to operate on. partition: Option, }, /// `DISABLE ROW LEVEL SECURITY` /// /// Note: this is a PostgreSQL-specific operation. + /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) DisableRowLevelSecurity, /// `DISABLE RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. DisableRule { + /// Name of the rule to disable. name: Ident, }, /// `DISABLE TRIGGER [ trigger_name | ALL | USER ]` /// /// Note: this is a PostgreSQL-specific operation. DisableTrigger { + /// Name of the trigger to disable (or ALL/USER). name: Ident, }, /// `DROP CONSTRAINT [ IF EXISTS ] ` DropConstraint { + /// `IF EXISTS` flag for dropping the constraint. if_exists: bool, + /// Name of the constraint to drop. name: Ident, + /// Optional drop behavior (`CASCADE`/`RESTRICT`). drop_behavior: Option, }, /// `DROP [ COLUMN ] [ IF EXISTS ] [ , , ... ] [ CASCADE ]` DropColumn { + /// Whether the `COLUMN` keyword was present. has_column_keyword: bool, + /// Names of columns to drop. column_names: Vec, + /// Whether `IF EXISTS` was specified for the columns. if_exists: bool, + /// Optional drop behavior for the column removal. drop_behavior: Option, }, /// `ATTACH PART|PARTITION ` @@ -204,6 +234,7 @@ pub enum AlterTableOperation { AttachPartition { // PART is not a short form of PARTITION, it's a separate keyword // which represents a physical file on disk and partition is a logical entity. + /// Partition expression to attach. partition: Partition, }, /// `DETACH PART|PARTITION ` @@ -211,20 +242,25 @@ pub enum AlterTableOperation { /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#detach-partitionpart) DetachPartition { // See `AttachPartition` for more details + /// Partition expression to detach. partition: Partition, }, /// `FREEZE PARTITION ` /// Note: this is a ClickHouse-specific operation, please refer to /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#freeze-partition) FreezePartition { + /// Partition to freeze. partition: Partition, + /// Optional name for the freeze operation. with_name: Option, }, /// `UNFREEZE PARTITION ` /// Note: this is a ClickHouse-specific operation, please refer to /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#unfreeze-partition) UnfreezePartition { + /// Partition to unfreeze. partition: Partition, + /// Optional name associated with the unfreeze operation. with_name: Option, }, /// `DROP PRIMARY KEY` @@ -232,6 +268,7 @@ pub enum AlterTableOperation { /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/alter-table.html) /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constraints-drop) DropPrimaryKey { + /// Optional drop behavior for the primary key (`CASCADE`/`RESTRICT`). drop_behavior: Option, }, /// `DROP FOREIGN KEY ` @@ -239,58 +276,80 @@ pub enum AlterTableOperation { /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/alter-table.html) /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constraints-drop) DropForeignKey { + /// Foreign key symbol/name to drop. name: Ident, + /// Optional drop behavior for the foreign key. drop_behavior: Option, }, /// `DROP INDEX ` /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html DropIndex { + /// Name of the index to drop. name: Ident, }, /// `ENABLE ALWAYS RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. EnableAlwaysRule { + /// Name of the rule to enable. name: Ident, }, /// `ENABLE ALWAYS TRIGGER trigger_name` /// /// Note: this is a PostgreSQL-specific operation. EnableAlwaysTrigger { + /// Name of the trigger to enable. name: Ident, }, /// `ENABLE REPLICA RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. EnableReplicaRule { + /// Name of the replica rule to enable. name: Ident, }, /// `ENABLE REPLICA TRIGGER trigger_name` /// /// Note: this is a PostgreSQL-specific operation. EnableReplicaTrigger { + /// Name of the replica trigger to enable. name: Ident, }, /// `ENABLE ROW LEVEL SECURITY` /// /// Note: this is a PostgreSQL-specific operation. + /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) EnableRowLevelSecurity, + /// `FORCE ROW LEVEL SECURITY` + /// + /// Note: this is a PostgreSQL-specific operation. + /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) + ForceRowLevelSecurity, + /// `NO FORCE ROW LEVEL SECURITY` + /// + /// Note: this is a PostgreSQL-specific operation. + /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) + NoForceRowLevelSecurity, /// `ENABLE RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. EnableRule { + /// Name of the rule to enable. name: Ident, }, /// `ENABLE TRIGGER [ trigger_name | ALL | USER ]` /// /// Note: this is a PostgreSQL-specific operation. EnableTrigger { + /// Name of the trigger to enable (or ALL/USER). name: Ident, }, /// `RENAME TO PARTITION (partition=val)` RenamePartitions { + /// Old partition expressions to be renamed. old_partitions: Vec, + /// New partition expressions corresponding to the old ones. new_partitions: Vec, }, /// REPLICA IDENTITY { DEFAULT | USING INDEX index_name | FULL | NOTHING } @@ -298,83 +357,125 @@ pub enum AlterTableOperation { /// Note: this is a PostgreSQL-specific operation. /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) ReplicaIdentity { + /// Replica identity setting to apply. identity: ReplicaIdentity, }, /// Add Partitions AddPartitions { + /// Whether `IF NOT EXISTS` was present when adding partitions. if_not_exists: bool, + /// New partitions to add. new_partitions: Vec, }, + /// `DROP PARTITIONS ...` / drop partitions from the table. DropPartitions { + /// Partitions to drop (expressions). partitions: Vec, + /// Whether `IF EXISTS` was specified for dropping partitions. if_exists: bool, }, /// `RENAME [ COLUMN ] TO ` RenameColumn { + /// Existing column name to rename. old_column_name: Ident, + /// New column name. new_column_name: Ident, }, /// `RENAME TO ` RenameTable { + /// The new table name or renaming kind. table_name: RenameTableNameKind, }, // CHANGE [ COLUMN ] [ ] + /// Change an existing column's name, type, and options. ChangeColumn { + /// Old column name. old_name: Ident, + /// New column name. new_name: Ident, + /// New data type for the column. data_type: DataType, + /// Column options to apply after the change. options: Vec, - /// MySQL `ALTER TABLE` only [FIRST | AFTER column_name] + /// MySQL-specific column position (`FIRST`/`AFTER`). column_position: Option, }, // CHANGE [ COLUMN ] [ ] + /// Modify an existing column's type and options. ModifyColumn { + /// Column name to modify. col_name: Ident, + /// New data type for the column. data_type: DataType, + /// Column options to set. options: Vec, - /// MySQL `ALTER TABLE` only [FIRST | AFTER column_name] + /// MySQL-specific column position (`FIRST`/`AFTER`). column_position: Option, }, /// `RENAME CONSTRAINT TO ` /// /// Note: this is a PostgreSQL-specific operation. + /// Rename a constraint on the table. RenameConstraint { + /// Existing constraint name. old_name: Ident, + /// New constraint name. new_name: Ident, }, /// `ALTER [ COLUMN ]` + /// Alter a specific column with the provided operation. AlterColumn { + /// The column to alter. column_name: Ident, + /// Operation to apply to the column. op: AlterColumnOperation, }, /// 'SWAP WITH ' /// /// Note: this is Snowflake specific SwapWith { + /// Table name to swap with. table_name: ObjectName, }, /// 'SET TBLPROPERTIES ( { property_key [ = ] property_val } [, ...] )' SetTblProperties { + /// Table properties specified as SQL options. table_properties: Vec, }, /// `OWNER TO { | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` /// /// Note: this is PostgreSQL-specific OwnerTo { + /// The new owner to assign to the table. new_owner: Owner, }, /// Snowflake table clustering options /// ClusterBy { + /// Expressions used for clustering the table. exprs: Vec, }, + /// Remove the clustering key from the table. DropClusteringKey, + /// Redshift `ALTER SORTKEY (column_list)` + /// + AlterSortKey { + /// Column references in the sort key. + columns: Vec, + }, + /// Suspend background reclustering operations. SuspendRecluster, + /// Resume background reclustering operations. ResumeRecluster, - /// `REFRESH` + /// `REFRESH [ '' ]` /// - /// Note: this is Snowflake specific for dynamic tables - Refresh, + /// Note: this is Snowflake specific for dynamic/external tables + /// + /// + Refresh { + /// Optional subpath for external table refresh + subpath: Option, + }, /// `SUSPEND` /// /// Note: this is Snowflake specific for dynamic tables @@ -389,7 +490,9 @@ pub enum AlterTableOperation { /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html Algorithm { + /// Whether the `=` sign was used (`ALGORITHM = ...`). equals: bool, + /// The algorithm to use for the alter operation (MySQL-specific). algorithm: AlterTableAlgorithm, }, @@ -399,7 +502,9 @@ pub enum AlterTableOperation { /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html Lock { + /// Whether the `=` sign was used (`LOCK = ...`). equals: bool, + /// The locking behavior to apply (MySQL-specific). lock: AlterTableLock, }, /// `AUTO_INCREMENT [=] ` @@ -408,11 +513,14 @@ pub enum AlterTableOperation { /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html AutoIncrement { + /// Whether the `=` sign was used (`AUTO_INCREMENT = ...`). equals: bool, + /// Value to set for the auto-increment counter. value: ValueWithSpan, }, /// `VALIDATE CONSTRAINT ` ValidateConstraint { + /// Name of the constraint to validate. name: Ident, }, /// Arbitrary parenthesized `SET` options. @@ -423,6 +531,7 @@ pub enum AlterTableOperation { /// ``` /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-altertable.html) SetOptionsParens { + /// Parenthesized options supplied to `SET (...)`. options: Vec, }, } @@ -434,12 +543,18 @@ pub enum AlterTableOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AlterPolicyOperation { + /// Rename the policy to `new_name`. Rename { + /// The new identifier for the policy. new_name: Ident, }, + /// Apply/modify policy properties. Apply { + /// Optional list of owners the policy applies to. to: Option>, + /// Optional `USING` expression for the policy. using: Option, + /// Optional `WITH CHECK` expression for the policy. with_check: Option, }, } @@ -473,13 +588,18 @@ impl fmt::Display for AlterPolicyOperation { /// [MySQL] `ALTER TABLE` algorithm. /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Algorithm option for `ALTER TABLE` operations (MySQL-specific). pub enum AlterTableAlgorithm { + /// Default algorithm selection. Default, + /// `INSTANT` algorithm. Instant, + /// `INPLACE` algorithm. Inplace, + /// `COPY` algorithm. Copy, } @@ -497,13 +617,18 @@ impl fmt::Display for AlterTableAlgorithm { /// [MySQL] `ALTER TABLE` lock. /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Locking behavior for `ALTER TABLE` (MySQL-specific). pub enum AlterTableLock { + /// `DEFAULT` lock behavior. Default, + /// `NONE` lock. None, + /// `SHARED` lock. Shared, + /// `EXCLUSIVE` lock. Exclusive, } @@ -521,10 +646,15 @@ impl fmt::Display for AlterTableLock { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// New owner specification for `ALTER TABLE ... OWNER TO ...` pub enum Owner { + /// A specific user/role identifier. Ident(Ident), + /// `CURRENT_ROLE` keyword. CurrentRole, + /// `CURRENT_USER` keyword. CurrentUser, + /// `SESSION_USER` keyword. SessionUser, } @@ -542,8 +672,11 @@ impl fmt::Display for Owner { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// New connector owner specification for `ALTER CONNECTOR ... OWNER TO ...` pub enum AlterConnectorOwner { + /// `USER ` connector owner. User(Ident), + /// `ROLE ` connector owner. Role(Ident), } @@ -559,8 +692,13 @@ impl fmt::Display for AlterConnectorOwner { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Alterations that can be applied to an index. pub enum AlterIndexOperation { - RenameIndex { index_name: ObjectName }, + /// Rename the index to `index_name`. + RenameIndex { + /// The new name for the index. + index_name: ObjectName, + }, } impl fmt::Display for AlterTableOperation { @@ -756,6 +894,12 @@ impl fmt::Display for AlterTableOperation { AlterTableOperation::EnableRowLevelSecurity => { write!(f, "ENABLE ROW LEVEL SECURITY") } + AlterTableOperation::ForceRowLevelSecurity => { + write!(f, "FORCE ROW LEVEL SECURITY") + } + AlterTableOperation::NoForceRowLevelSecurity => { + write!(f, "NO FORCE ROW LEVEL SECURITY") + } AlterTableOperation::EnableRule { name } => { write!(f, "ENABLE RULE {name}") } @@ -855,6 +999,10 @@ impl fmt::Display for AlterTableOperation { write!(f, "DROP CLUSTERING KEY")?; Ok(()) } + AlterTableOperation::AlterSortKey { columns } => { + write!(f, "ALTER SORTKEY({})", display_comma_separated(columns))?; + Ok(()) + } AlterTableOperation::SuspendRecluster => { write!(f, "SUSPEND RECLUSTER")?; Ok(()) @@ -863,8 +1011,12 @@ impl fmt::Display for AlterTableOperation { write!(f, "RESUME RECLUSTER")?; Ok(()) } - AlterTableOperation::Refresh => { - write!(f, "REFRESH") + AlterTableOperation::Refresh { subpath } => { + write!(f, "REFRESH")?; + if let Some(path) = subpath { + write!(f, " '{path}'")?; + } + Ok(()) } AlterTableOperation::Suspend => { write!(f, "SUSPEND") @@ -911,7 +1063,9 @@ impl fmt::Display for AlterIndexOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterType { + /// Name of the type being altered (may be schema-qualified). pub name: ObjectName, + /// The specific alteration operation to perform. pub operation: AlterTypeOperation, } @@ -920,8 +1074,11 @@ pub struct AlterType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AlterTypeOperation { + /// Rename the type. Rename(AlterTypeRename), + /// Add a new value to the type (for enum-like types). AddValue(AlterTypeAddValue), + /// Rename an existing value of the type. RenameValue(AlterTypeRenameValue), } @@ -930,6 +1087,7 @@ pub enum AlterTypeOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterTypeRename { + /// The new name for the type. pub new_name: Ident, } @@ -938,8 +1096,11 @@ pub struct AlterTypeRename { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterTypeAddValue { + /// If true, do not error when the value already exists (`IF NOT EXISTS`). pub if_not_exists: bool, + /// The identifier for the new value to add. pub value: Ident, + /// Optional relative position for the new value (`BEFORE` / `AFTER`). pub position: Option, } @@ -948,7 +1109,9 @@ pub struct AlterTypeAddValue { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AlterTypeAddValuePosition { + /// Place the new value before the given neighbor value. Before(Ident), + /// Place the new value after the given neighbor value. After(Ident), } @@ -957,7 +1120,9 @@ pub enum AlterTypeAddValuePosition { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterTypeRenameValue { + /// Existing value identifier to rename. pub from: Ident, + /// New identifier for the value. pub to: Ident, } @@ -995,6 +1160,107 @@ impl fmt::Display for AlterTypeOperation { } } +/// `ALTER OPERATOR` statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterOperator { + /// Operator name (can be schema-qualified) + pub name: ObjectName, + /// Left operand type (`None` if no left operand) + pub left_type: Option, + /// Right operand type + pub right_type: DataType, + /// The operation to perform + pub operation: AlterOperatorOperation, +} + +/// An [AlterOperator] operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterOperatorOperation { + /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + OwnerTo(Owner), + /// `SET SCHEMA new_schema` + /// Set the operator's schema name. + SetSchema { + /// New schema name for the operator + schema_name: ObjectName, + }, + /// `SET ( options )` + Set { + /// List of operator options to set + options: Vec, + }, +} + +/// Option for `ALTER OPERATOR SET` operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorOption { + /// `RESTRICT = { res_proc | NONE }` + Restrict(Option), + /// `JOIN = { join_proc | NONE }` + Join(Option), + /// `COMMUTATOR = com_op` + Commutator(ObjectName), + /// `NEGATOR = neg_op` + Negator(ObjectName), + /// `HASHES` + Hashes, + /// `MERGES` + Merges, +} + +impl fmt::Display for AlterOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ALTER OPERATOR {} (", self.name)?; + if let Some(left_type) = &self.left_type { + write!(f, "{}", left_type)?; + } else { + write!(f, "NONE")?; + } + write!(f, ", {}) {}", self.right_type, self.operation) + } +} + +impl fmt::Display for AlterOperatorOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::OwnerTo(owner) => write!(f, "OWNER TO {}", owner), + Self::SetSchema { schema_name } => write!(f, "SET SCHEMA {}", schema_name), + Self::Set { options } => { + write!(f, "SET (")?; + for (i, option) in options.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", option)?; + } + write!(f, ")") + } + } + } +} + +impl fmt::Display for OperatorOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Restrict(Some(proc_name)) => write!(f, "RESTRICT = {}", proc_name), + Self::Restrict(None) => write!(f, "RESTRICT = NONE"), + Self::Join(Some(proc_name)) => write!(f, "JOIN = {}", proc_name), + Self::Join(None) => write!(f, "JOIN = NONE"), + Self::Commutator(op_name) => write!(f, "COMMUTATOR = {}", op_name), + Self::Negator(op_name) => write!(f, "NEGATOR = {}", op_name), + Self::Hashes => write!(f, "HASHES"), + Self::Merges => write!(f, "MERGES"), + } + } +} + /// An `ALTER COLUMN` (`Statement::AlterTable`) operation #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1005,15 +1271,20 @@ pub enum AlterColumnOperation { /// `DROP NOT NULL` DropNotNull, /// `SET DEFAULT ` - SetDefault { value: Expr }, + /// Set the column default value. + SetDefault { + /// Expression representing the new default value. + value: Expr, + }, /// `DROP DEFAULT` DropDefault, /// `[SET DATA] TYPE [USING ]` SetDataType { + /// Target data type for the column. data_type: DataType, - /// PostgreSQL specific + /// PostgreSQL-specific `USING ` expression for conversion. using: Option, - /// Set to true if the statement includes the `SET DATA TYPE` keywords + /// Set to true if the statement includes the `SET DATA TYPE` keywords. had_set: bool, }, @@ -1021,7 +1292,9 @@ pub enum AlterColumnOperation { /// /// Note: this is a PostgreSQL-specific operation. AddGenerated { + /// Optional `GENERATED AS` specifier (e.g. `ALWAYS` or `BY DEFAULT`). generated_as: Option, + /// Optional sequence options for identity generation. sequence_options: Option>, }, } @@ -1097,6 +1370,7 @@ pub enum KeyOrIndexDisplay { } impl KeyOrIndexDisplay { + /// Check if this is the `None` variant. pub fn is_none(self) -> bool { matches!(self, Self::None) } @@ -1136,12 +1410,19 @@ impl fmt::Display for KeyOrIndexDisplay { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum IndexType { + /// B-Tree index (commonly default for many databases). BTree, + /// Hash index. Hash, + /// Generalized Inverted Index (GIN). GIN, + /// Generalized Search Tree (GiST) index. GiST, + /// Space-partitioned GiST (SPGiST) index. SPGiST, + /// Block Range Index (BRIN). BRIN, + /// Bloom filter based index. Bloom, /// Users may define their own index types, which would /// not be covered by the above variants. @@ -1192,7 +1473,7 @@ impl fmt::Display for IndexOption { /// [PostgreSQL] unique index nulls handling option: `[ NULLS [ NOT ] DISTINCT ]` /// /// [PostgreSQL]: https://www.postgresql.org/docs/17/sql-altertable.html -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum NullsDistinctOption { @@ -1217,10 +1498,15 @@ impl fmt::Display for NullsDistinctOption { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A parameter of a stored procedure or function declaration. pub struct ProcedureParam { + /// Parameter name. pub name: Ident, + /// Parameter data type. pub data_type: DataType, + /// Optional mode (`IN`, `OUT`, `INOUT`, etc.). pub mode: Option, + /// Optional default expression for the parameter. pub default: Option, } @@ -1245,8 +1531,11 @@ impl fmt::Display for ProcedureParam { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ColumnDef { + /// Column name. pub name: Ident, + /// Column data type. pub data_type: DataType, + /// Column options (defaults, constraints, generated, etc.). pub options: Vec, } @@ -1284,20 +1573,27 @@ impl fmt::Display for ColumnDef { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ViewColumnDef { + /// Column identifier. pub name: Ident, + /// Optional data type for the column. pub data_type: Option, + /// Optional column options (defaults, comments, etc.). pub options: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Representation of how multiple `ColumnOption`s are grouped for a column. pub enum ColumnOptions { + /// Options separated by comma: `OPTIONS(a, b, c)`. CommaSeparated(Vec), + /// Options separated by spaces: `OPTION_A OPTION_B`. SpaceSeparated(Vec), } impl ColumnOptions { + /// Get the column options as a slice. pub fn as_slice(&self) -> &[ColumnOption] { match self { ColumnOptions::CommaSeparated(options) => options.as_slice(), @@ -1346,7 +1642,9 @@ impl fmt::Display for ViewColumnDef { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ColumnOptionDef { + /// Optional name of the constraint. pub name: Option, + /// The actual column option (e.g. `NOT NULL`, `DEFAULT`, `GENERATED`, ...). pub option: ColumnOption, } @@ -1407,11 +1705,14 @@ impl fmt::Display for IdentityPropertyKind { } } +/// Properties for the `IDENTITY` / `AUTOINCREMENT` column option. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IdentityProperty { + /// Optional parameters specifying seed/increment for the identity column. pub parameters: Option, + /// Optional ordering specifier (`ORDER` / `NOORDER`). pub order: Option, } @@ -1466,11 +1767,14 @@ impl fmt::Display for IdentityPropertyFormatKind { } } } +/// Parameters specifying seed and increment for identity columns. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IdentityParameters { + /// The initial seed expression for the identity column. pub seed: Expr, + /// The increment expression for the identity column. pub increment: Expr, } @@ -1480,11 +1784,13 @@ pub struct IdentityParameters { /// ORDER | NOORDER /// ``` /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum IdentityPropertyOrder { + /// `ORDER` - preserve ordering for generated values (where supported). Order, + /// `NOORDER` - do not enforce ordering for generated values. NoOrder, } @@ -1508,7 +1814,9 @@ impl fmt::Display for IdentityPropertyOrder { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ColumnPolicy { + /// `MASKING POLICY ()` MaskingPolicy(ColumnPolicyProperty), + /// `PROJECTION POLICY ()` ProjectionPolicy(ColumnPolicyProperty), } @@ -1532,6 +1840,7 @@ impl fmt::Display for ColumnPolicy { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Properties describing a column policy (masking or projection). pub struct ColumnPolicyProperty { /// This flag indicates that the column policy option is declared using the `WITH` prefix. /// Example @@ -1540,7 +1849,9 @@ pub struct ColumnPolicyProperty { /// ``` /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table pub with: bool, + /// The name of the policy to apply to the column. pub policy_name: ObjectName, + /// Optional list of column identifiers referenced by the policy. pub using_columns: Option>, } @@ -1561,6 +1872,7 @@ pub struct TagsColumnOption { /// ``` /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table pub with: bool, + /// List of tags to attach to the column. pub tags: Vec, } @@ -1609,7 +1921,7 @@ pub enum ColumnOption { /// [ MATCH { FULL | PARTIAL | SIMPLE } ] /// { [ON DELETE ] [ON UPDATE ] | /// [ON UPDATE ] [ON DELETE ] - /// } + /// } /// [] /// `). ForeignKey(ForeignKeyConstraint), @@ -1619,16 +1931,24 @@ pub enum ColumnOption { /// - MySQL's `AUTO_INCREMENT` or SQLite's `AUTOINCREMENT` /// - ... DialectSpecific(Vec), + /// `CHARACTER SET ` column option CharacterSet(ObjectName), + /// `COLLATE ` column option Collation(ObjectName), + /// `COMMENT ''` column option Comment(String), + /// `ON UPDATE ` column option OnUpdate(Expr), /// `Generated`s are modifiers that follow a column definition in a `CREATE /// TABLE` statement. Generated { + /// How the column is generated (e.g. `GENERATED ALWAYS`, `BY DEFAULT`, or expression-stored). generated_as: GeneratedAs, + /// Sequence/identity options when generation is backed by a sequence. sequence_options: Option>, + /// Optional expression used to generate the column value. generation_expr: Option, + /// Mode of the generated expression (`VIRTUAL` or `STORED`) when `generation_expr` is present. generation_expr_mode: Option, /// false if 'GENERATED ALWAYS' is skipped (option starts with AS) generated_keyword: bool, @@ -1730,7 +2050,7 @@ impl fmt::Display for ColumnOption { Ok(()) } Unique(constraint) => { - write!(f, "UNIQUE")?; + write!(f, "UNIQUE{:>}", constraint.index_type_display)?; if let Some(characteristics) = &constraint.characteristics { write!(f, " {characteristics}")?; } @@ -1790,7 +2110,7 @@ impl fmt::Display for ColumnOption { GeneratedAs::Always => "ALWAYS", GeneratedAs::ByDefault => "BY DEFAULT", // ExpStored goes with an expression, handled above - GeneratedAs::ExpStored => unreachable!(), + GeneratedAs::ExpStored => "", }; write!(f, "GENERATED {when} AS IDENTITY")?; if sequence_options.is_some() { @@ -1836,22 +2156,27 @@ impl fmt::Display for ColumnOption { /// `GeneratedAs`s are modifiers that follow a column option in a `generated`. /// 'ExpStored' is used for a column generated from an expression and stored. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GeneratedAs { + /// `GENERATED ALWAYS` Always, + /// `GENERATED BY DEFAULT` ByDefault, + /// Expression-based generated column that is stored (used internally for expression-stored columns) ExpStored, } /// `GeneratedExpressionMode`s are modifiers that follow an expression in a `generated`. /// No modifier is typically the same as Virtual. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GeneratedExpressionMode { + /// `VIRTUAL` generated expression Virtual, + /// `STORED` generated expression Stored, } @@ -1914,6 +2239,7 @@ pub struct ConstraintCharacteristics { pub enforced: Option, } +/// Initial setting for deferrable constraints (`INITIALLY IMMEDIATE` or `INITIALLY DEFERRED`). #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -1985,10 +2311,15 @@ impl fmt::Display for ConstraintCharacteristics { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ReferentialAction { + /// `RESTRICT` - disallow action if it would break referential integrity. Restrict, + /// `CASCADE` - propagate the action to referencing rows. Cascade, + /// `SET NULL` - set referencing columns to NULL. SetNull, + /// `NO ACTION` - no action at the time; may be deferred. NoAction, + /// `SET DEFAULT` - set referencing columns to their default values. SetDefault, } @@ -2011,7 +2342,9 @@ impl fmt::Display for ReferentialAction { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum DropBehavior { + /// `RESTRICT` - refuse to drop if there are any dependent objects. Restrict, + /// `CASCADE` - automatically drop objects that depend on the object being dropped. Cascade, } @@ -2031,16 +2364,22 @@ impl fmt::Display for DropBehavior { pub enum UserDefinedTypeRepresentation { /// Composite type: `CREATE TYPE name AS (attributes)` Composite { + /// List of attributes for the composite type. attributes: Vec, }, /// Enum type: `CREATE TYPE name AS ENUM (labels)` /// /// Note: this is PostgreSQL-specific. See - Enum { labels: Vec }, + /// Enum type: `CREATE TYPE name AS ENUM (labels)` + Enum { + /// Labels that make up the enum type. + labels: Vec, + }, /// Range type: `CREATE TYPE name AS RANGE (options)` /// /// Note: this is PostgreSQL-specific. See Range { + /// Options for the range type definition. options: Vec, }, /// Base type (SQL definition): `CREATE TYPE name (options)` @@ -2049,6 +2388,7 @@ pub enum UserDefinedTypeRepresentation { /// /// Note: this is PostgreSQL-specific. See SqlDefinition { + /// Options for SQL definition of the user-defined type. options: Vec, }, } @@ -2077,8 +2417,11 @@ impl fmt::Display for UserDefinedTypeRepresentation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct UserDefinedTypeCompositeAttributeDef { + /// Attribute name. pub name: Ident, + /// Attribute data type. pub data_type: DataType, + /// Optional collation for the attribute. pub collation: Option, } @@ -2381,11 +2724,14 @@ impl fmt::Display for UserDefinedTypeSqlDefinitionOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Partition { + /// ClickHouse supports PARTITION ID 'partition_id' syntax. Identifier(Ident), + /// ClickHouse supports PARTITION expr syntax. Expr(Expr), /// ClickHouse supports PART expr which represents physical partition in disk. /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#attach-partitionpart) Part(Expr), + /// Hive supports multiple partitions in PARTITION (part1, part2, ...) syntax. Partitions(Vec), } @@ -2408,7 +2754,9 @@ impl fmt::Display for Partition { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Deduplicate { + /// DEDUPLICATE ALL All, + /// DEDUPLICATE BY expr ByExpression(Expr), } @@ -2429,8 +2777,11 @@ impl fmt::Display for Deduplicate { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ClusteredBy { + /// columns used for clustering pub columns: Vec, + /// optional sorted by expressions pub sorted_by: Option>, + /// number of buckets pub num_buckets: Value, } @@ -2456,19 +2807,28 @@ pub struct CreateIndex { /// index name pub name: Option, #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + /// table name pub table_name: ObjectName, /// Index type used in the statement. Can also be found inside [`CreateIndex::index_options`] /// depending on the position of the option within the statement. pub using: Option, + /// columns included in the index pub columns: Vec, + /// whether the index is unique pub unique: bool, + /// whether the index is created concurrently pub concurrently: bool, + /// IF NOT EXISTS clause pub if_not_exists: bool, + /// INCLUDE clause: pub include: Vec, + /// NULLS DISTINCT / NOT DISTINCT clause: pub nulls_distinct: Option, /// WITH clause: pub with: Vec, + /// WHERE clause: pub predicate: Option, + /// Index options: pub index_options: Vec, /// [MySQL] allows a subset of options normally used for `ALTER TABLE`: /// @@ -2535,35 +2895,60 @@ impl fmt::Display for CreateIndex { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateTable { + /// `OR REPLACE` clause pub or_replace: bool, + /// `TEMP` or `TEMPORARY` clause pub temporary: bool, + /// `EXTERNAL` clause pub external: bool, + /// `DYNAMIC` clause pub dynamic: bool, + /// `GLOBAL` clause pub global: Option, + /// `IF NOT EXISTS` clause pub if_not_exists: bool, + /// `TRANSIENT` clause pub transient: bool, + /// `VOLATILE` clause pub volatile: bool, + /// `ICEBERG` clause pub iceberg: bool, + /// `SNAPSHOT` clause + /// + pub snapshot: bool, /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub name: ObjectName, - /// Optional schema + /// Column definitions pub columns: Vec, + /// Table constraints pub constraints: Vec, + /// Hive-specific distribution style pub hive_distribution: HiveDistributionStyle, + /// Hive-specific formats like `ROW FORMAT DELIMITED` or `ROW FORMAT SERDE 'serde_class' WITH SERDEPROPERTIES (...)` pub hive_formats: Option, + /// Table options pub table_options: CreateTableOptions, + /// General comment for the table pub file_format: Option, + /// Location of the table data pub location: Option, + /// Query used to populate the table pub query: Option>, + /// If the table should be created without a rowid (SQLite) pub without_rowid: bool, + /// `LIKE` clause pub like: Option, + /// `CLONE` clause pub clone: Option, + /// Table version (for systems that support versioned tables) pub version: Option, - // For Hive dialect, the table comment is after the column definitions without `=`, - // so the `comment` field is optional and different than the comment field in the general options list. - // [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) + /// For Hive dialect, the table comment is after the column definitions without `=`, + /// so the `comment` field is optional and different than the comment field in the general options list. + /// [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) pub comment: Option, + /// ClickHouse "ON COMMIT" clause: + /// pub on_commit: Option, /// ClickHouse "ON CLUSTER" clause: /// @@ -2591,6 +2976,14 @@ pub struct CreateTable { /// /// pub inherits: Option>, + /// PostgreSQL `PARTITION OF` clause to create a partition of a parent table. + /// Contains the parent table name. + /// + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub partition_of: Option, + /// PostgreSQL partition bound specification for PARTITION OF. + /// + pub for_values: Option, /// SQLite "STRICT" clause. /// if the "STRICT" table-option keyword is added to the end, after the closing ")", /// then strict typing rules apply to that table. @@ -2619,6 +3012,9 @@ pub struct CreateTable { /// Snowflake "WITH ROW ACCESS POLICY" clause /// pub with_row_access_policy: Option, + /// Snowflake `WITH STORAGE LIFECYCLE POLICY` clause + /// + pub with_storage_lifecycle_policy: Option, /// Snowflake "WITH TAG" clause /// pub with_tags: Option>, @@ -2652,6 +3048,18 @@ pub struct CreateTable { /// Snowflake "REQUIRE USER" clause for dybamic tables /// pub require_user: bool, + /// Redshift `DISTSTYLE` option + /// + pub diststyle: Option, + /// Redshift `DISTKEY` option + /// + pub distkey: Option, + /// Redshift `SORTKEY` option + /// + pub sortkey: Option>, + /// Redshift `BACKUP` option: `BACKUP { YES | NO }` + /// + pub backup: Option, } impl fmt::Display for CreateTable { @@ -2665,9 +3073,10 @@ impl fmt::Display for CreateTable { // `CREATE TABLE t (a INT) AS SELECT a from t2` write!( f, - "CREATE {or_replace}{external}{global}{temporary}{transient}{volatile}{dynamic}{iceberg}TABLE {if_not_exists}{name}", + "CREATE {or_replace}{external}{global}{temporary}{transient}{volatile}{dynamic}{iceberg}{snapshot}TABLE {if_not_exists}{name}", or_replace = if self.or_replace { "OR REPLACE " } else { "" }, external = if self.external { "EXTERNAL " } else { "" }, + snapshot = if self.snapshot { "SNAPSHOT " } else { "" }, global = self.global .map(|global| { if global { @@ -2686,6 +3095,9 @@ impl fmt::Display for CreateTable { dynamic = if self.dynamic { "DYNAMIC " } else { "" }, name = self.name, )?; + if let Some(partition_of) = &self.partition_of { + write!(f, " PARTITION OF {partition_of}")?; + } if let Some(on_cluster) = &self.on_cluster { write!(f, " ON CLUSTER {on_cluster}")?; } @@ -2700,12 +3112,19 @@ impl fmt::Display for CreateTable { Indent(DisplayCommaSeparated(&self.constraints)).fmt(f)?; NewLine.fmt(f)?; f.write_str(")")?; - } else if self.query.is_none() && self.like.is_none() && self.clone.is_none() { + } else if self.query.is_none() + && self.like.is_none() + && self.clone.is_none() + && self.partition_of.is_none() + { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens f.write_str(" ()")?; } else if let Some(CreateTableLikeKind::Parenthesized(like_in_columns_list)) = &self.like { write!(f, " ({like_in_columns_list})")?; } + if let Some(for_values) = &self.for_values { + write!(f, " {for_values}")?; + } // Hive table comment should be after column definitions, please refer to: // [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) @@ -2803,7 +3222,9 @@ impl fmt::Display for CreateTable { if let Some(file_format) = self.file_format { write!(f, " STORED AS {file_format}")?; } - write!(f, " LOCATION '{}'", self.location.as_ref().unwrap())?; + if let Some(location) = &self.location { + write!(f, " LOCATION '{location}'")?; + } } match &self.table_options { @@ -2902,6 +3323,10 @@ impl fmt::Display for CreateTable { write!(f, " {row_access_policy}",)?; } + if let Some(storage_lifecycle_policy) = &self.with_storage_lifecycle_policy { + write!(f, " {storage_lifecycle_policy}",)?; + } + if let Some(tag) = &self.with_tags { write!(f, " WITH TAG ({})", display_comma_separated(tag.as_slice()))?; } @@ -2938,6 +3363,18 @@ impl fmt::Display for CreateTable { if self.strict { write!(f, " STRICT")?; } + if let Some(backup) = self.backup { + write!(f, " BACKUP {}", if backup { "YES" } else { "NO" })?; + } + if let Some(diststyle) = &self.diststyle { + write!(f, " DISTSTYLE {diststyle}")?; + } + if let Some(distkey) = &self.distkey { + write!(f, " DISTKEY({distkey})")?; + } + if let Some(sortkey) = &self.sortkey { + write!(f, " SORTKEY({})", display_comma_separated(sortkey))?; + } if let Some(query) = &self.query { write!(f, " AS {query}")?; } @@ -2945,6 +3382,114 @@ impl fmt::Display for CreateTable { } } +/// PostgreSQL partition bound specification for `PARTITION OF`. +/// +/// Specifies partition bounds for a child partition table. +/// +/// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createtable.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ForValues { + /// `FOR VALUES IN (expr, ...)` + In(Vec), + /// `FOR VALUES FROM (expr|MINVALUE|MAXVALUE, ...) TO (expr|MINVALUE|MAXVALUE, ...)` + From { + /// The lower bound values for the partition. + from: Vec, + /// The upper bound values for the partition. + to: Vec, + }, + /// `FOR VALUES WITH (MODULUS n, REMAINDER r)` + With { + /// The modulus value for hash partitioning. + modulus: u64, + /// The remainder value for hash partitioning. + remainder: u64, + }, + /// `DEFAULT` + Default, +} + +impl fmt::Display for ForValues { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ForValues::In(values) => { + write!(f, "FOR VALUES IN ({})", display_comma_separated(values)) + } + ForValues::From { from, to } => { + write!( + f, + "FOR VALUES FROM ({}) TO ({})", + display_comma_separated(from), + display_comma_separated(to) + ) + } + ForValues::With { modulus, remainder } => { + write!( + f, + "FOR VALUES WITH (MODULUS {modulus}, REMAINDER {remainder})" + ) + } + ForValues::Default => write!(f, "DEFAULT"), + } + } +} + +/// A value in a partition bound specification. +/// +/// Used in RANGE partition bounds where values can be expressions, +/// MINVALUE (negative infinity), or MAXVALUE (positive infinity). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum PartitionBoundValue { + /// An expression representing a partition bound value. + Expr(Expr), + /// Represents negative infinity in partition bounds. + MinValue, + /// Represents positive infinity in partition bounds. + MaxValue, +} + +impl fmt::Display for PartitionBoundValue { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PartitionBoundValue::Expr(expr) => write!(f, "{expr}"), + PartitionBoundValue::MinValue => write!(f, "MINVALUE"), + PartitionBoundValue::MaxValue => write!(f, "MAXVALUE"), + } + } +} + +/// Redshift distribution style for `CREATE TABLE`. +/// +/// See [Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_TABLE_NEW.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum DistStyle { + /// `DISTSTYLE AUTO` + Auto, + /// `DISTSTYLE EVEN` + Even, + /// `DISTSTYLE KEY` + Key, + /// `DISTSTYLE ALL` + All, +} + +impl fmt::Display for DistStyle { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DistStyle::Auto => write!(f, "AUTO"), + DistStyle::Even => write!(f, "EVEN"), + DistStyle::Key => write!(f, "KEY"), + DistStyle::All => write!(f, "ALL"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -2994,20 +3539,49 @@ impl fmt::Display for CreateDomain { } } +/// The return type of a `CREATE FUNCTION` statement. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FunctionReturnType { + /// `RETURNS ` + DataType(DataType), + /// `RETURNS SETOF ` + /// + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) + SetOf(DataType), +} + +impl fmt::Display for FunctionReturnType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FunctionReturnType::DataType(data_type) => write!(f, "{data_type}"), + FunctionReturnType::SetOf(data_type) => write!(f, "SETOF {data_type}"), + } + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// CREATE FUNCTION statement pub struct CreateFunction { /// True if this is a `CREATE OR ALTER FUNCTION` statement /// /// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql?view=sql-server-ver16#or-alter) pub or_alter: bool, + /// True if this is a `CREATE OR REPLACE FUNCTION` statement pub or_replace: bool, + /// True if this is a `CREATE TEMPORARY FUNCTION` statement pub temporary: bool, + /// True if this is a `CREATE IF NOT EXISTS FUNCTION` statement pub if_not_exists: bool, + /// Name of the function to be created. pub name: ObjectName, + /// List of arguments for the function. pub args: Option>, - pub return_type: Option, + /// The return type of the function. + pub return_type: Option, /// The expression that defines the function. /// /// Examples: @@ -3030,6 +3604,14 @@ pub struct CreateFunction { /// /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) pub parallel: Option, + /// SECURITY { DEFINER | INVOKER } + /// + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) + pub security: Option, + /// SET configuration_parameter clauses + /// + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) + pub set_params: Vec, /// USING ... (Hive only) pub using: Option, /// Language used in a UDF definition. @@ -3096,6 +3678,12 @@ impl fmt::Display for CreateFunction { if let Some(parallel) = &self.parallel { write!(f, " {parallel}")?; } + if let Some(security) = &self.security { + write!(f, " {security}")?; + } + for set_param in &self.set_params { + write!(f, " {set_param}")?; + } if let Some(remote_connection) = &self.remote_connection { write!(f, " REMOTE WITH CONNECTION {remote_connection}")?; } @@ -3148,11 +3736,17 @@ impl fmt::Display for CreateFunction { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateConnector { + /// The name of the connector to be created. pub name: Ident, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, + /// The type of the connector. pub connector_type: Option, + /// The URL of the connector. pub url: Option, + /// The comment for the connector. pub comment: Option, + /// The DC properties for the connector. pub with_dcproperties: Option>, } @@ -3201,23 +3795,36 @@ impl fmt::Display for CreateConnector { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AlterSchemaOperation { + /// Set the default collation for the schema. SetDefaultCollate { + /// The collation to set as default. collate: Expr, }, + /// Add a replica to the schema. AddReplica { + /// The replica to add. replica: Ident, + /// Optional options for the replica. options: Option>, }, + /// Drop a replica from the schema. DropReplica { + /// The replica to drop. replica: Ident, }, + /// Set options for the schema. SetOptionsParens { + /// The options to set. options: Vec, }, + /// Rename the schema. Rename { + /// The new name for the schema. name: ObjectName, }, + /// Change the owner of the schema. OwnerTo { + /// The new owner of the schema. owner: Owner, }, } @@ -3253,7 +3860,9 @@ impl fmt::Display for AlterSchemaOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum RenameTableNameKind { + /// `AS new_table_name` As(ObjectName), + /// `TO new_table_name` To(ObjectName), } @@ -3269,9 +3878,13 @@ impl fmt::Display for RenameTableNameKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An `ALTER SCHEMA` (`Statement::AlterSchema`) statement. pub struct AlterSchema { + /// The schema name to alter. pub name: ObjectName, + /// Whether `IF EXISTS` was specified. pub if_exists: bool, + /// The list of operations to perform on the schema. pub operations: Vec, } @@ -3299,7 +3912,7 @@ impl Spanned for RenameTableNameKind { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Whether the syntax used for the trigger object (ROW or STATEMENT) is `FOR` or `FOR EACH`. @@ -3562,7 +4175,7 @@ impl fmt::Display for DropTrigger { /// A `TRUNCATE` statement. /// /// ```sql -/// TRUNCATE TABLE table_names [PARTITION (partitions)] [RESTART IDENTITY | CONTINUE IDENTITY] [CASCADE | RESTRICT] [ON CLUSTER cluster_name] +/// TRUNCATE TABLE [IF EXISTS] table_names [PARTITION (partitions)] [RESTART IDENTITY | CONTINUE IDENTITY] [CASCADE | RESTRICT] [ON CLUSTER cluster_name] /// ``` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -3574,6 +4187,8 @@ pub struct Truncate { pub partitions: Option>, /// TABLE - optional keyword pub table: bool, + /// Snowflake/Redshift-specific option: [ IF EXISTS ] + pub if_exists: bool, /// Postgres-specific option: [ RESTART IDENTITY | CONTINUE IDENTITY ] pub identity: Option, /// Postgres-specific option: [ CASCADE | RESTRICT ] @@ -3586,10 +4201,11 @@ pub struct Truncate { impl fmt::Display for Truncate { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let table = if self.table { "TABLE " } else { "" }; + let if_exists = if self.if_exists { "IF EXISTS " } else { "" }; write!( f, - "TRUNCATE {table}{table_names}", + "TRUNCATE {table}{if_exists}{table_names}", table_names = display_comma_separated(&self.table_names) )?; @@ -3679,7 +4295,9 @@ pub struct CreateView { /// /// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-view-transact-sql) pub or_alter: bool, + /// The `OR REPLACE` clause is used to re-create the view if it already exists. pub or_replace: bool, + /// if true, has MATERIALIZED view modifier pub materialized: bool, /// Snowflake: SECURE view modifier /// @@ -3697,9 +4315,13 @@ pub struct CreateView { /// CREATE VIEW IF NOT EXISTS myview AS SELECT 1` /// ``` pub name_before_not_exists: bool, + /// Optional column definitions pub columns: Vec, + /// The query that defines the view. pub query: Box, + /// Table options (e.g., WITH (..), OPTIONS (...)) pub options: CreateTableOptions, + /// BigQuery: CLUSTER BY columns pub cluster_by: Vec, /// Snowflake: Views can have comments in Snowflake. /// @@ -3710,6 +4332,9 @@ pub struct CreateView { pub if_not_exists: bool, /// if true, has SQLite `TEMP` or `TEMPORARY` clause pub temporary: bool, + /// Snowflake: `COPY GRANTS` clause + /// + pub copy_grants: bool, /// if not None, has Clickhouse `TO` clause, specify the table into which to insert results /// pub to: Option, @@ -3753,6 +4378,9 @@ impl fmt::Display for CreateView { .map(|to| format!(" TO {to}")) .unwrap_or_default() )?; + if self.copy_grants { + write!(f, " COPY GRANTS")?; + } if !self.columns.is_empty() { write!(f, " ({})", display_comma_separated(&self.columns))?; } @@ -3788,10 +4416,15 @@ impl fmt::Display for CreateView { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateExtension { + /// Extension name pub name: Ident, + /// Whether `IF NOT EXISTS` was specified for the CREATE EXTENSION. pub if_not_exists: bool, + /// Whether `CASCADE` was specified for the CREATE EXTENSION. pub cascade: bool, + /// Optional schema name for the extension. pub schema: Option, + /// Optional version for the extension. pub version: Option, } @@ -3831,7 +4464,7 @@ impl Spanned for CreateExtension { } } -/// DROP EXTENSION statement +/// DROP EXTENSION statement /// Note: this is a PostgreSQL-specific statement /// /// # References @@ -3842,9 +4475,11 @@ impl Spanned for CreateExtension { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DropExtension { + /// One or more extension names to drop pub names: Vec, + /// Whether `IF EXISTS` was specified for the DROP EXTENSION. pub if_exists: bool, - /// `CASCADE` or `RESTRICT` + /// `CASCADE` or `RESTRICT` behaviour for the drop. pub cascade_or_restrict: Option, } @@ -3878,8 +4513,11 @@ pub enum AlterTableType { /// Iceberg, /// Dynamic table type - /// + /// Dynamic, + /// External table type + /// + External, } /// ALTER TABLE statement @@ -3890,9 +4528,13 @@ pub struct AlterTable { /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub name: ObjectName, + /// Whether `IF EXISTS` was specified for the `ALTER TABLE`. pub if_exists: bool, + /// Whether the `ONLY` keyword was used (restrict scope to the named table). pub only: bool, + /// List of `ALTER TABLE` operations to apply. pub operations: Vec, + /// Optional Hive `SET LOCATION` clause for the alter operation. pub location: Option, /// ClickHouse dialect supports `ON CLUSTER` clause for ALTER TABLE /// For example: `ALTER TABLE table_name ON CLUSTER cluster_name ADD COLUMN c UInt32` @@ -3909,6 +4551,7 @@ impl fmt::Display for AlterTable { match &self.table_type { Some(AlterTableType::Iceberg) => write!(f, "ALTER ICEBERG TABLE ")?, Some(AlterTableType::Dynamic) => write!(f, "ALTER DYNAMIC TABLE ")?, + Some(AlterTableType::External) => write!(f, "ALTER EXTERNAL TABLE ")?, None => write!(f, "ALTER TABLE ")?, } @@ -3935,6 +4578,7 @@ impl fmt::Display for AlterTable { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DropFunction { + /// Whether to include the `IF EXISTS` clause. pub if_exists: bool, /// One or more functions to drop pub func_desc: Vec, @@ -3979,18 +4623,8 @@ pub struct CreateOperator { pub left_arg: Option, /// RIGHTARG parameter (right operand type) pub right_arg: Option, - /// COMMUTATOR parameter (commutator operator) - pub commutator: Option, - /// NEGATOR parameter (negator operator) - pub negator: Option, - /// RESTRICT parameter (restriction selectivity function) - pub restrict: Option, - /// JOIN parameter (join selectivity function) - pub join: Option, - /// HASHES flag - pub hashes: bool, - /// MERGES flag - pub merges: bool, + /// Operator options (COMMUTATOR, NEGATOR, RESTRICT, JOIN, HASHES, MERGES) + pub options: Vec, } /// CREATE OPERATOR FAMILY statement @@ -4042,23 +4676,9 @@ impl fmt::Display for CreateOperator { if let Some(right_arg) = &self.right_arg { params.push(format!("RIGHTARG = {}", right_arg)); } - if let Some(commutator) = &self.commutator { - params.push(format!("COMMUTATOR = {}", commutator)); - } - if let Some(negator) = &self.negator { - params.push(format!("NEGATOR = {}", negator)); - } - if let Some(restrict) = &self.restrict { - params.push(format!("RESTRICT = {}", restrict)); - } - if let Some(join) = &self.join { - params.push(format!("JOIN = {}", join)); - } - if self.hashes { - params.push("HASHES".to_string()); - } - if self.merges { - params.push("MERGES".to_string()); + + for option in &self.options { + params.push(option.to_string()); } write!(f, "{}", params.join(", "))?; @@ -4095,7 +4715,9 @@ impl fmt::Display for CreateOperatorClass { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct OperatorArgTypes { + /// Left-hand operand data type for the operator. pub left: DataType, + /// Right-hand operand data type for the operator. pub right: DataType, } @@ -4110,26 +4732,33 @@ impl fmt::Display for OperatorArgTypes { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OperatorClassItem { - /// OPERATOR clause + /// `OPERATOR` clause describing a specific operator implementation. Operator { - strategy_number: u32, + /// Strategy number identifying the operator position in the opclass. + strategy_number: u64, + /// The operator name referenced by this clause. operator_name: ObjectName, - /// Optional operator argument types + /// Optional operator argument types. op_types: Option, - /// FOR SEARCH or FOR ORDER BY + /// Optional purpose such as `FOR SEARCH` or `FOR ORDER BY`. purpose: Option, }, - /// FUNCTION clause + /// `FUNCTION` clause describing a support function for the operator class. Function { - support_number: u32, - /// Optional function argument types for the operator class + /// Support function number for this entry. + support_number: u64, + /// Optional function argument types for the operator class. op_types: Option>, + /// The function name implementing the support function. function_name: ObjectName, - /// Function argument types + /// Function argument types for the support function. argument_types: Vec, }, - /// STORAGE clause - Storage { storage_type: DataType }, + /// `STORAGE` clause specifying the storage type. + Storage { + /// The storage data type. + storage_type: DataType, + }, } /// Purpose of an operator in an operator class @@ -4137,8 +4766,13 @@ pub enum OperatorClassItem { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OperatorPurpose { + /// Purpose: used for index/search operations. ForSearch, - ForOrderBy { sort_family: ObjectName }, + /// Purpose: used for ORDER BY; optionally includes a sort family name. + ForOrderBy { + /// Optional sort family object name. + sort_family: ObjectName, + }, } impl fmt::Display for OperatorClassItem { @@ -4325,3 +4959,456 @@ impl Spanned for DropOperatorClass { Span::empty() } } + +/// An item in an ALTER OPERATOR FAMILY ADD statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorFamilyItem { + /// `OPERATOR` clause in an operator family modification. + Operator { + /// Strategy number for the operator. + strategy_number: u64, + /// Operator name referenced by this entry. + operator_name: ObjectName, + /// Operator argument types. + op_types: Vec, + /// Optional purpose such as `FOR SEARCH` or `FOR ORDER BY`. + purpose: Option, + }, + /// `FUNCTION` clause in an operator family modification. + Function { + /// Support function number. + support_number: u64, + /// Optional operator argument types for the function. + op_types: Option>, + /// Function name for the support function. + function_name: ObjectName, + /// Function argument types. + argument_types: Vec, + }, +} + +/// An item in an ALTER OPERATOR FAMILY DROP statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorFamilyDropItem { + /// `OPERATOR` clause for DROP within an operator family. + Operator { + /// Strategy number for the operator. + strategy_number: u64, + /// Operator argument types. + op_types: Vec, + }, + /// `FUNCTION` clause for DROP within an operator family. + Function { + /// Support function number. + support_number: u64, + /// Operator argument types for the function. + op_types: Vec, + }, +} + +impl fmt::Display for OperatorFamilyItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorFamilyItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + } => { + write!( + f, + "OPERATOR {strategy_number} {operator_name} ({})", + display_comma_separated(op_types) + )?; + if let Some(purpose) = purpose { + write!(f, " {purpose}")?; + } + Ok(()) + } + OperatorFamilyItem::Function { + support_number, + op_types, + function_name, + argument_types, + } => { + write!(f, "FUNCTION {support_number}")?; + if let Some(types) = op_types { + write!(f, " ({})", display_comma_separated(types))?; + } + write!(f, " {function_name}")?; + if !argument_types.is_empty() { + write!(f, "({})", display_comma_separated(argument_types))?; + } + Ok(()) + } + } + } +} + +impl fmt::Display for OperatorFamilyDropItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorFamilyDropItem::Operator { + strategy_number, + op_types, + } => { + write!( + f, + "OPERATOR {strategy_number} ({})", + display_comma_separated(op_types) + ) + } + OperatorFamilyDropItem::Function { + support_number, + op_types, + } => { + write!( + f, + "FUNCTION {support_number} ({})", + display_comma_separated(op_types) + ) + } + } + } +} + +/// `ALTER OPERATOR FAMILY` statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterOperatorFamily { + /// Operator family name (can be schema-qualified) + pub name: ObjectName, + /// Index method (btree, hash, gist, gin, etc.) + pub using: Ident, + /// The operation to perform + pub operation: AlterOperatorFamilyOperation, +} + +/// An [AlterOperatorFamily] operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterOperatorFamilyOperation { + /// `ADD { OPERATOR ... | FUNCTION ... } [, ...]` + Add { + /// List of operator family items to add + items: Vec, + }, + /// `DROP { OPERATOR ... | FUNCTION ... } [, ...]` + Drop { + /// List of operator family items to drop + items: Vec, + }, + /// `RENAME TO new_name` + RenameTo { + /// The new name for the operator family. + new_name: ObjectName, + }, + /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + OwnerTo(Owner), + /// `SET SCHEMA new_schema` + SetSchema { + /// The target schema name. + schema_name: ObjectName, + }, +} + +impl fmt::Display for AlterOperatorFamily { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ALTER OPERATOR FAMILY {} USING {}", + self.name, self.using + )?; + write!(f, " {}", self.operation) + } +} + +impl fmt::Display for AlterOperatorFamilyOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AlterOperatorFamilyOperation::Add { items } => { + write!(f, "ADD {}", display_comma_separated(items)) + } + AlterOperatorFamilyOperation::Drop { items } => { + write!(f, "DROP {}", display_comma_separated(items)) + } + AlterOperatorFamilyOperation::RenameTo { new_name } => { + write!(f, "RENAME TO {new_name}") + } + AlterOperatorFamilyOperation::OwnerTo(owner) => { + write!(f, "OWNER TO {owner}") + } + AlterOperatorFamilyOperation::SetSchema { schema_name } => { + write!(f, "SET SCHEMA {schema_name}") + } + } + } +} + +impl Spanned for AlterOperatorFamily { + fn span(&self) -> Span { + Span::empty() + } +} + +/// `ALTER OPERATOR CLASS` statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterOperatorClass { + /// Operator class name (can be schema-qualified) + pub name: ObjectName, + /// Index method (btree, hash, gist, gin, etc.) + pub using: Ident, + /// The operation to perform + pub operation: AlterOperatorClassOperation, +} + +/// An [AlterOperatorClass] operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterOperatorClassOperation { + /// `RENAME TO new_name` + /// Rename the operator class to a new name. + RenameTo { + /// The new name for the operator class. + new_name: ObjectName, + }, + /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + OwnerTo(Owner), + /// `SET SCHEMA new_schema` + /// Set the schema for the operator class. + SetSchema { + /// The target schema name. + schema_name: ObjectName, + }, +} + +impl fmt::Display for AlterOperatorClass { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ALTER OPERATOR CLASS {} USING {}", self.name, self.using)?; + write!(f, " {}", self.operation) + } +} + +impl fmt::Display for AlterOperatorClassOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AlterOperatorClassOperation::RenameTo { new_name } => { + write!(f, "RENAME TO {new_name}") + } + AlterOperatorClassOperation::OwnerTo(owner) => { + write!(f, "OWNER TO {owner}") + } + AlterOperatorClassOperation::SetSchema { schema_name } => { + write!(f, "SET SCHEMA {schema_name}") + } + } + } +} + +impl Spanned for AlterOperatorClass { + fn span(&self) -> Span { + Span::empty() + } +} + +/// CREATE POLICY statement. +/// +/// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreatePolicy { + /// Name of the policy. + pub name: Ident, + /// Table the policy is defined on. + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + /// Optional policy type (e.g., `PERMISSIVE` / `RESTRICTIVE`). + pub policy_type: Option, + /// Optional command the policy applies to (e.g., `SELECT`). + pub command: Option, + /// Optional list of grantee owners. + pub to: Option>, + /// Optional expression for the `USING` clause. + pub using: Option, + /// Optional expression for the `WITH CHECK` clause. + pub with_check: Option, +} + +impl fmt::Display for CreatePolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "CREATE POLICY {name} ON {table_name}", + name = self.name, + table_name = self.table_name, + )?; + if let Some(ref policy_type) = self.policy_type { + write!(f, " AS {policy_type}")?; + } + if let Some(ref command) = self.command { + write!(f, " FOR {command}")?; + } + if let Some(ref to) = self.to { + write!(f, " TO {}", display_comma_separated(to))?; + } + if let Some(ref using) = self.using { + write!(f, " USING ({using})")?; + } + if let Some(ref with_check) = self.with_check { + write!(f, " WITH CHECK ({with_check})")?; + } + Ok(()) + } +} + +/// Policy type for a `CREATE POLICY` statement. +/// ```sql +/// AS [ PERMISSIVE | RESTRICTIVE ] +/// ``` +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CreatePolicyType { + /// Policy allows operations unless explicitly denied. + Permissive, + /// Policy denies operations unless explicitly allowed. + Restrictive, +} + +impl fmt::Display for CreatePolicyType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CreatePolicyType::Permissive => write!(f, "PERMISSIVE"), + CreatePolicyType::Restrictive => write!(f, "RESTRICTIVE"), + } + } +} + +/// Command that a policy can apply to (FOR clause). +/// ```sql +/// FOR [ALL | SELECT | INSERT | UPDATE | DELETE] +/// ``` +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CreatePolicyCommand { + /// Applies to all commands. + All, + /// Applies to SELECT. + Select, + /// Applies to INSERT. + Insert, + /// Applies to UPDATE. + Update, + /// Applies to DELETE. + Delete, +} + +impl fmt::Display for CreatePolicyCommand { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CreatePolicyCommand::All => write!(f, "ALL"), + CreatePolicyCommand::Select => write!(f, "SELECT"), + CreatePolicyCommand::Insert => write!(f, "INSERT"), + CreatePolicyCommand::Update => write!(f, "UPDATE"), + CreatePolicyCommand::Delete => write!(f, "DELETE"), + } + } +} + +/// DROP POLICY statement. +/// +/// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-droppolicy.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct DropPolicy { + /// `true` when `IF EXISTS` was present. + pub if_exists: bool, + /// Name of the policy to drop. + pub name: Ident, + /// Name of the table the policy applies to. + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + /// Optional drop behavior (`CASCADE` or `RESTRICT`). + pub drop_behavior: Option, +} + +impl fmt::Display for DropPolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "DROP POLICY {if_exists}{name} ON {table_name}", + if_exists = if self.if_exists { "IF EXISTS " } else { "" }, + name = self.name, + table_name = self.table_name + )?; + if let Some(ref behavior) = self.drop_behavior { + write!(f, " {behavior}")?; + } + Ok(()) + } +} + +impl From for crate::ast::Statement { + fn from(v: CreatePolicy) -> Self { + crate::ast::Statement::CreatePolicy(v) + } +} + +impl From for crate::ast::Statement { + fn from(v: DropPolicy) -> Self { + crate::ast::Statement::DropPolicy(v) + } +} + +/// ALTER POLICY statement. +/// +/// ```sql +/// ALTER POLICY ON [] +/// ``` +/// (Postgresql-specific) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterPolicy { + /// Policy name to alter. + pub name: Ident, + /// Target table name the policy is defined on. + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + /// Optional operation specific to the policy alteration. + pub operation: AlterPolicyOperation, +} + +impl fmt::Display for AlterPolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ALTER POLICY {name} ON {table_name}{operation}", + name = self.name, + table_name = self.table_name, + operation = self.operation + ) + } +} + +impl From for crate::ast::Statement { + fn from(v: AlterPolicy) -> Self { + crate::ast::Statement::AlterPolicy(v) + } +} diff --git a/src/ast/dml.rs b/src/ast/dml.rs index d6009ce8ab..446d44b205 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -24,13 +24,17 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::display_utils::{indented_list, Indent, SpaceOrNewline}; +use crate::{ + ast::display_separated, + display_utils::{indented_list, Indent, SpaceOrNewline}, +}; use super::{ display_comma_separated, helpers::attached_token::AttachedToken, query::InputFormatClause, Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, - OrderByExpr, Query, SelectItem, Setting, SqliteOnConflict, TableObject, TableWithJoins, - UpdateTableFromKind, + OptimizerHint, OrderByExpr, Query, SelectInto, SelectItem, Setting, SqliteOnConflict, + TableAliasWithoutColumns, TableFactor, TableObject, TableWithJoins, UpdateTableFromKind, + Values, }; /// INSERT statement. @@ -40,6 +44,11 @@ use super::{ pub struct Insert { /// Token for the `INSERT` keyword (or its substitutes) pub insert_token: AttachedToken, + /// Query optimizer hints + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hints: Vec, /// Only for Sqlite pub or: Option, /// Only for mysql @@ -48,10 +57,11 @@ pub struct Insert { pub into: bool, /// TABLE pub table: TableObject, - /// table_name as foo (for PostgreSQL) - pub table_alias: Option, + /// `table_name as foo` (for PostgreSQL) + /// `table_name foo` (for Oracle) + pub table_alias: Option, /// COLUMNS - pub columns: Vec, + pub columns: Vec, /// Overwrite (Hive) pub overwrite: bool, /// A SQL query that specifies what to insert @@ -65,9 +75,13 @@ pub struct Insert { pub after_columns: Vec, /// whether the insert has the table keyword (Hive) pub has_table_keyword: bool, + /// ON INSERT pub on: Option, /// RETURNING pub returning: Option>, + /// OUTPUT (MSSQL) + /// See + pub output: Option, /// Only for mysql pub replace_into: bool, /// Only for mysql @@ -87,18 +101,52 @@ pub struct Insert { /// /// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data) pub format_clause: Option, + /// For Snowflake multi-table insert: specifies the type (`ALL` or `FIRST`) + /// + /// - `None` means this is a regular single-table INSERT + /// - `Some(All)` means `INSERT ALL` (all matching WHEN clauses are executed) + /// - `Some(First)` means `INSERT FIRST` (only the first matching WHEN clause is executed) + /// + /// See: + pub multi_table_insert_type: Option, + /// For multi-table insert: additional INTO clauses (unconditional) + /// + /// Used for `INSERT ALL INTO t1 INTO t2 ... SELECT ...` + /// + /// See: + pub multi_table_into_clauses: Vec, + /// For conditional multi-table insert: WHEN clauses + /// + /// Used for `INSERT ALL/FIRST WHEN cond THEN INTO t1 ... SELECT ...` + /// + /// See: + pub multi_table_when_clauses: Vec, + /// For conditional multi-table insert: ELSE clause + /// + /// See: + pub multi_table_else_clause: Option>, } impl Display for Insert { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let table_name = if let Some(alias) = &self.table_alias { - format!("{0} AS {alias}", self.table) + // SQLite OR conflict has a special format: INSERT OR ... INTO table_name + let table_name = if let Some(table_alias) = &self.table_alias { + format!( + "{table} {as_keyword}{alias}", + table = self.table, + as_keyword = if table_alias.explicit { "AS " } else { "" }, + alias = table_alias.alias + ) } else { self.table.to_string() }; if let Some(on_conflict) = self.or { - write!(f, "INSERT {on_conflict} INTO {table_name} ")?; + f.write_str("INSERT")?; + for hint in &self.optimizer_hints { + write!(f, " {hint}")?; + } + write!(f, " {on_conflict} INTO {table_name} ")?; } else { write!( f, @@ -107,43 +155,92 @@ impl Display for Insert { "REPLACE" } else { "INSERT" - }, + } )?; + for hint in &self.optimizer_hints { + write!(f, " {hint}")?; + } if let Some(priority) = self.priority { - write!(f, " {priority}",)?; + write!(f, " {priority}")?; } - write!( - f, - "{ignore}{over}{int}{tbl} {table_name} ", - table_name = table_name, - ignore = if self.ignore { " IGNORE" } else { "" }, - over = if self.overwrite { " OVERWRITE" } else { "" }, - int = if self.into { " INTO" } else { "" }, - tbl = if self.has_table_keyword { " TABLE" } else { "" }, - )?; + if self.ignore { + write!(f, " IGNORE")?; + } + + if self.overwrite { + write!(f, " OVERWRITE")?; + } + + if let Some(insert_type) = &self.multi_table_insert_type { + write!(f, " {}", insert_type)?; + } + + if self.into { + write!(f, " INTO")?; + } + + if self.has_table_keyword { + write!(f, " TABLE")?; + } + + if !table_name.is_empty() { + write!(f, " {table_name} ")?; + } } + if !self.columns.is_empty() { write!(f, "({})", display_comma_separated(&self.columns))?; SpaceOrNewline.fmt(f)?; } + if let Some(ref parts) = self.partitioned { if !parts.is_empty() { write!(f, "PARTITION ({})", display_comma_separated(parts))?; SpaceOrNewline.fmt(f)?; } } + if !self.after_columns.is_empty() { write!(f, "({})", display_comma_separated(&self.after_columns))?; SpaceOrNewline.fmt(f)?; } + if let Some(output) = &self.output { + write!(f, "{output}")?; + SpaceOrNewline.fmt(f)?; + } + if let Some(settings) = &self.settings { write!(f, "SETTINGS {}", display_comma_separated(settings))?; SpaceOrNewline.fmt(f)?; } + for into_clause in &self.multi_table_into_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", into_clause)?; + } + + for when_clause in &self.multi_table_when_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", when_clause)?; + } + + if let Some(else_clauses) = &self.multi_table_else_clause { + SpaceOrNewline.fmt(f)?; + write!(f, "ELSE")?; + for into_clause in else_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", into_clause)?; + } + } + if let Some(source) = &self.source { + if !self.multi_table_into_clauses.is_empty() + || !self.multi_table_when_clauses.is_empty() + { + SpaceOrNewline.fmt(f)?; + } source.fmt(f)?; } else if !self.assignments.is_empty() { write!(f, "SET")?; @@ -173,6 +270,7 @@ impl Display for Insert { f.write_str("RETURNING")?; indented_list(f, returning)?; } + Ok(()) } } @@ -184,6 +282,11 @@ impl Display for Insert { pub struct Delete { /// Token for the `DELETE` keyword pub delete_token: AttachedToken, + /// Query optimizer hints + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hints: Vec, /// Multi tables delete are supported in mysql pub tables: Vec, /// FROM @@ -194,6 +297,9 @@ pub struct Delete { pub selection: Option, /// RETURNING pub returning: Option>, + /// OUTPUT (MSSQL) + /// See + pub output: Option, /// ORDER BY (MySQL) pub order_by: Vec, /// LIMIT (MySQL) @@ -203,6 +309,10 @@ pub struct Delete { impl Display for Delete { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str("DELETE")?; + for hint in &self.optimizer_hints { + f.write_str(" ")?; + hint.fmt(f)?; + } if !self.tables.is_empty() { indented_list(f, &self.tables)?; } @@ -215,6 +325,10 @@ impl Display for Delete { indented_list(f, from)?; } } + if let Some(output) = &self.output { + SpaceOrNewline.fmt(f)?; + write!(f, "{output}")?; + } if let Some(using) = &self.using { SpaceOrNewline.fmt(f)?; f.write_str("USING")?; @@ -253,6 +367,11 @@ impl Display for Delete { pub struct Update { /// Token for the `UPDATE` keyword pub update_token: AttachedToken, + /// Query optimizer hints + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hints: Vec, /// TABLE pub table: TableWithJoins, /// Column assignments @@ -263,6 +382,9 @@ pub struct Update { pub selection: Option, /// RETURNING pub returning: Option>, + /// OUTPUT (MSSQL) + /// See + pub output: Option, /// SQLite-specific conflict resolution clause pub or: Option, /// LIMIT @@ -271,7 +393,12 @@ pub struct Update { impl Display for Update { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str("UPDATE ")?; + f.write_str("UPDATE")?; + for hint in &self.optimizer_hints { + f.write_str(" ")?; + hint.fmt(f)?; + } + f.write_str(" ")?; if let Some(or) = &self.or { or.fmt(f)?; f.write_str(" ")?; @@ -287,6 +414,10 @@ impl Display for Update { f.write_str("SET")?; indented_list(f, &self.assignments)?; } + if let Some(output) = &self.output { + SpaceOrNewline.fmt(f)?; + write!(f, "{output}")?; + } if let Some(UpdateTableFromKind::AfterSet(from)) = &self.from { SpaceOrNewline.fmt(f)?; f.write_str("FROM")?; @@ -310,3 +441,465 @@ impl Display for Update { Ok(()) } } + +/// A `MERGE` statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Merge { + /// The `MERGE` token that starts the statement. + pub merge_token: AttachedToken, + /// Query optimizer hints + /// + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hints: Vec, + /// optional INTO keyword + pub into: bool, + /// Specifies the table to merge + pub table: TableFactor, + /// Specifies the table or subquery to join with the target table + pub source: TableFactor, + /// Specifies the expression on which to join the target table and source + pub on: Box, + /// Specifies the actions to perform when values match or do not match. + pub clauses: Vec, + /// Specifies the output to save changes in MSSQL + pub output: Option, +} + +impl Display for Merge { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("MERGE")?; + for hint in &self.optimizer_hints { + write!(f, " {hint}")?; + } + if self.into { + write!(f, " INTO")?; + } + write!( + f, + " {table} USING {source} ", + table = self.table, + source = self.source + )?; + write!(f, "ON {on} ", on = self.on)?; + write!(f, "{}", display_separated(&self.clauses, " "))?; + if let Some(ref output) = self.output { + write!(f, " {output}")?; + } + Ok(()) + } +} + +/// A `WHEN` clause within a `MERGE` Statement +/// +/// Example: +/// ```sql +/// WHEN NOT MATCHED BY SOURCE AND product LIKE '%washer%' THEN DELETE +/// ``` +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeClause { + /// The `WHEN` token that starts the sub-expression. + pub when_token: AttachedToken, + /// The type of `WHEN` clause. + pub clause_kind: MergeClauseKind, + /// An optional predicate to further restrict the clause. + pub predicate: Option, + /// The action to perform when the clause is matched. + pub action: MergeAction, +} + +impl Display for MergeClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let MergeClause { + when_token: _, + clause_kind, + predicate, + action, + } = self; + + write!(f, "WHEN {clause_kind}")?; + if let Some(pred) = predicate { + write!(f, " AND {pred}")?; + } + write!(f, " THEN {action}") + } +} + +/// Variant of `WHEN` clause used within a `MERGE` Statement. +/// +/// Example: +/// ```sql +/// MERGE INTO T USING U ON FALSE WHEN MATCHED THEN DELETE +/// ``` +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeClauseKind { + /// `WHEN MATCHED` + Matched, + /// `WHEN NOT MATCHED` + NotMatched, + /// `WHEN MATCHED BY TARGET` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + NotMatchedByTarget, + /// `WHEN MATCHED BY SOURCE` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + NotMatchedBySource, +} + +impl Display for MergeClauseKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeClauseKind::Matched => write!(f, "MATCHED"), + MergeClauseKind::NotMatched => write!(f, "NOT MATCHED"), + MergeClauseKind::NotMatchedByTarget => write!(f, "NOT MATCHED BY TARGET"), + MergeClauseKind::NotMatchedBySource => write!(f, "NOT MATCHED BY SOURCE"), + } + } +} + +/// Underlying statement of a `WHEN` clause within a `MERGE` Statement +/// +/// Example +/// ```sql +/// INSERT (product, quantity) VALUES(product, quantity) +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeAction { + /// An `INSERT` clause + /// + /// Example: + /// ```sql + /// INSERT (product, quantity) VALUES(product, quantity) + /// ``` + Insert(MergeInsertExpr), + /// An `UPDATE` clause + /// + /// Example: + /// ```sql + /// UPDATE SET quantity = T.quantity + S.quantity + /// ``` + Update(MergeUpdateExpr), + /// A plain `DELETE` clause + Delete { + /// The `DELETE` token that starts the sub-expression. + delete_token: AttachedToken, + }, +} + +impl Display for MergeAction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeAction::Insert(insert) => { + write!(f, "INSERT {insert}") + } + MergeAction::Update(update) => { + write!(f, "UPDATE {update}") + } + MergeAction::Delete { .. } => { + write!(f, "DELETE") + } + } + } +} + +/// The type of expression used to insert rows within a `MERGE` statement. +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeInsertKind { + /// The insert expression is defined from an explicit `VALUES` clause + /// + /// Example: + /// ```sql + /// INSERT VALUES(product, quantity) + /// ``` + Values(Values), + /// The insert expression is defined using only the `ROW` keyword. + /// + /// Example: + /// ```sql + /// INSERT ROW + /// ``` + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + Row, +} + +impl Display for MergeInsertKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeInsertKind::Values(values) => { + write!(f, "{values}") + } + MergeInsertKind::Row => { + write!(f, "ROW") + } + } + } +} + +/// The expression used to insert rows within a `MERGE` statement. +/// +/// Examples +/// ```sql +/// INSERT (product, quantity) VALUES(product, quantity) +/// INSERT ROW +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeInsertExpr { + /// The `INSERT` token that starts the sub-expression. + pub insert_token: AttachedToken, + /// Columns (if any) specified by the insert. + /// + /// Example: + /// ```sql + /// INSERT (product, quantity) VALUES(product, quantity) + /// INSERT (product, quantity) ROW + /// ``` + pub columns: Vec, + /// The token, `[VALUES | ROW]` starting `kind`. + pub kind_token: AttachedToken, + /// The insert type used by the statement. + pub kind: MergeInsertKind, + /// An optional condition to restrict the insertion (Oracle specific) + pub insert_predicate: Option, +} + +impl Display for MergeInsertExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if !self.columns.is_empty() { + write!(f, "({}) ", display_comma_separated(self.columns.as_slice()))?; + } + write!(f, "{}", self.kind)?; + if let Some(predicate) = self.insert_predicate.as_ref() { + write!(f, " WHERE {}", predicate)?; + } + Ok(()) + } +} + +/// The expression used to update rows within a `MERGE` statement. +/// +/// Examples +/// ```sql +/// UPDATE SET quantity = T.quantity + S.quantity +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeUpdateExpr { + /// The `UPDATE` token that starts the sub-expression. + pub update_token: AttachedToken, + /// The update assiment expressions + pub assignments: Vec, + /// `where_clause` for the update (Oralce specific) + pub update_predicate: Option, + /// `delete_clause` for the update "delete where" (Oracle specific) + pub delete_predicate: Option, +} + +impl Display for MergeUpdateExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SET {}", display_comma_separated(&self.assignments))?; + if let Some(predicate) = self.update_predicate.as_ref() { + write!(f, " WHERE {predicate}")?; + } + if let Some(predicate) = self.delete_predicate.as_ref() { + write!(f, " DELETE WHERE {predicate}")?; + } + Ok(()) + } +} + +/// An `OUTPUT` clause on `MERGE`, `INSERT`, `UPDATE`, or `DELETE` (MSSQL). +/// +/// Example: +/// OUTPUT $action, deleted.* INTO dbo.temp_products; +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OutputClause { + /// `OUTPUT` clause + Output { + /// The `OUTPUT` token that starts the sub-expression. + output_token: AttachedToken, + /// The select items to output + select_items: Vec, + /// Optional `INTO` table to direct the output + into_table: Option, + }, + /// `RETURNING` clause + Returning { + /// The `RETURNING` token that starts the sub-expression. + returning_token: AttachedToken, + /// The select items to return + select_items: Vec, + }, +} + +impl fmt::Display for OutputClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OutputClause::Output { + output_token: _, + select_items, + into_table, + } => { + f.write_str("OUTPUT ")?; + display_comma_separated(select_items).fmt(f)?; + if let Some(into_table) = into_table { + f.write_str(" ")?; + into_table.fmt(f)?; + } + Ok(()) + } + OutputClause::Returning { + returning_token: _, + select_items, + } => { + f.write_str("RETURNING ")?; + display_comma_separated(select_items).fmt(f) + } + } + } +} + +/// A WHEN clause in a conditional multi-table INSERT. +/// +/// Syntax: +/// ```sql +/// WHEN n1 > 100 THEN +/// INTO t1 +/// INTO t2 (c1, c2) VALUES (n1, n2) +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MultiTableInsertWhenClause { + /// The condition for this WHEN clause + pub condition: Expr, + /// The INTO clauses to execute when the condition is true + pub into_clauses: Vec, +} + +impl Display for MultiTableInsertWhenClause { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "WHEN {} THEN", self.condition)?; + for into_clause in &self.into_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", into_clause)?; + } + Ok(()) + } +} + +/// An INTO clause in a multi-table INSERT. +/// +/// Syntax: +/// ```sql +/// INTO [ ( [ , ... ] ) ] [ VALUES ( { | DEFAULT | NULL } [ , ... ] ) ] +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MultiTableInsertIntoClause { + /// The target table + pub table_name: ObjectName, + /// The target columns (optional) + pub columns: Vec, + /// The VALUES clause (optional) + pub values: Option, +} + +impl Display for MultiTableInsertIntoClause { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "INTO {}", self.table_name)?; + if !self.columns.is_empty() { + write!(f, " ({})", display_comma_separated(&self.columns))?; + } + if let Some(values) = &self.values { + write!(f, " VALUES ({})", display_comma_separated(&values.values))?; + } + Ok(()) + } +} + +/// The VALUES clause in a multi-table INSERT INTO clause. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MultiTableInsertValues { + /// The values to insert (can be column references, DEFAULT, or NULL) + pub values: Vec, +} + +/// A value in a multi-table INSERT VALUES clause. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MultiTableInsertValue { + /// A column reference or expression from the source + Expr(Expr), + /// The DEFAULT keyword + Default, +} + +impl Display for MultiTableInsertValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MultiTableInsertValue::Expr(expr) => write!(f, "{}", expr), + MultiTableInsertValue::Default => write!(f, "DEFAULT"), + } + } +} + +/// The type of multi-table INSERT statement(Snowflake). +/// +/// See: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MultiTableInsertType { + /// `INSERT ALL` - all matching WHEN clauses are executed + All, + /// `INSERT FIRST` - only the first matching WHEN clause is executed + First, +} + +impl Display for MultiTableInsertType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MultiTableInsertType::All => write!(f, "ALL"), + MultiTableInsertType::First => write!(f, "FIRST"), + } + } +} diff --git a/src/ast/helpers/key_value_options.rs b/src/ast/helpers/key_value_options.rs index 745c3a65af..2aa59d9d7c 100644 --- a/src/ast/helpers/key_value_options.rs +++ b/src/ast/helpers/key_value_options.rs @@ -29,29 +29,38 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::{display_comma_separated, display_separated, Value}; +use crate::ast::{display_comma_separated, display_separated, ValueWithSpan}; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A collection of key-value options. pub struct KeyValueOptions { + /// The list of key-value options. pub options: Vec, + /// The delimiter used between options. pub delimiter: KeyValueOptionsDelimiter, } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The delimiter used between key-value options. pub enum KeyValueOptionsDelimiter { + /// Options are separated by spaces. Space, + /// Options are separated by commas. Comma, } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single key-value option. pub struct KeyValueOption { + /// The name of the option. pub option_name: String, + /// The value of the option. pub option_value: KeyValueOptionKind, } @@ -63,9 +72,13 @@ pub struct KeyValueOption { #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The kind of value for a key-value option. pub enum KeyValueOptionKind { - Single(Value), - Multi(Vec), + /// A single value. + Single(ValueWithSpan), + /// Multiple values. + Multi(Vec), + /// A nested list of key-value options. KeyValueOptions(Box), } diff --git a/src/ast/helpers/mod.rs b/src/ast/helpers/mod.rs index 3efbcf7b05..344895dc15 100644 --- a/src/ast/helpers/mod.rs +++ b/src/ast/helpers/mod.rs @@ -14,8 +14,14 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. + +/// Helper utilities for attached tokens used by AST helpers. pub mod attached_token; +/// Utilities for parsing key/value style options in helper statements. pub mod key_value_options; +/// Helpers for `CREATE DATABASE` statement construction/parsing. pub mod stmt_create_database; +/// Helpers for `CREATE TABLE` statement construction/parsing. pub mod stmt_create_table; +/// Helpers for data loading/unloading related statements (stages, PUT, COPY INTO). pub mod stmt_data_loading; diff --git a/src/ast/helpers/stmt_create_database.rs b/src/ast/helpers/stmt_create_database.rs index 58a7b0906b..e524228dee 100644 --- a/src/ast/helpers/stmt_create_database.rs +++ b/src/ast/helpers/stmt_create_database.rs @@ -55,29 +55,62 @@ use crate::parser::ParserError; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateDatabaseBuilder { + /// The database name to create. pub db_name: ObjectName, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, + /// Optional storage location for the database. pub location: Option, + /// Optional managed storage location. pub managed_location: Option, + /// Whether `OR REPLACE` was specified. pub or_replace: bool, + /// Whether the database is `TRANSIENT`. pub transient: bool, + /// Optional `CLONE` source object name. pub clone: Option, + /// Optional data retention time in days. pub data_retention_time_in_days: Option, + /// Optional max data extension time in days. pub max_data_extension_time_in_days: Option, + /// Optional external volume identifier. pub external_volume: Option, + /// Optional catalog name. pub catalog: Option, + /// Whether to replace invalid characters. pub replace_invalid_characters: Option, + /// Optional default DDL collation. pub default_ddl_collation: Option, + /// Optional storage serialization policy. pub storage_serialization_policy: Option, + /// Optional comment attached to the database. pub comment: Option, + /// Optional default character set (MySQL). + /// + /// + pub default_charset: Option, + /// Optional default collation (MySQL). + /// + /// + pub default_collation: Option, + /// Optional catalog sync configuration. pub catalog_sync: Option, + /// Optional catalog sync namespace mode. pub catalog_sync_namespace_mode: Option, + /// Optional namespace flatten delimiter for catalog sync. pub catalog_sync_namespace_flatten_delimiter: Option, + /// Optional tags attached to the database. pub with_tags: Option>, + /// Optional contact entries associated with the database. pub with_contacts: Option>, } impl CreateDatabaseBuilder { + /// Create a new `CreateDatabaseBuilder` with the given database name. + /// + /// # Arguments + /// + /// * `name` - The name of the database to be created. pub fn new(name: ObjectName) -> Self { Self { db_name: name, @@ -95,6 +128,8 @@ impl CreateDatabaseBuilder { default_ddl_collation: None, storage_serialization_policy: None, comment: None, + default_charset: None, + default_collation: None, catalog_sync: None, catalog_sync_namespace_mode: None, catalog_sync_namespace_flatten_delimiter: None, @@ -103,41 +138,49 @@ impl CreateDatabaseBuilder { } } + /// Set the location for the database. pub fn location(mut self, location: Option) -> Self { self.location = location; self } + /// Set the managed location for the database. pub fn managed_location(mut self, managed_location: Option) -> Self { self.managed_location = managed_location; self } + /// Set whether this is an `OR REPLACE` operation. pub fn or_replace(mut self, or_replace: bool) -> Self { self.or_replace = or_replace; self } + /// Set whether this is a transient database. pub fn transient(mut self, transient: bool) -> Self { self.transient = transient; self } + /// Set whether to use `IF NOT EXISTS`. pub fn if_not_exists(mut self, if_not_exists: bool) -> Self { self.if_not_exists = if_not_exists; self } + /// Set the clone clause for the database. pub fn clone_clause(mut self, clone: Option) -> Self { self.clone = clone; self } + /// Set the data retention time in days. pub fn data_retention_time_in_days(mut self, data_retention_time_in_days: Option) -> Self { self.data_retention_time_in_days = data_retention_time_in_days; self } + /// Set the maximum data extension time in days. pub fn max_data_extension_time_in_days( mut self, max_data_extension_time_in_days: Option, @@ -146,26 +189,31 @@ impl CreateDatabaseBuilder { self } + /// Set the external volume for the database. pub fn external_volume(mut self, external_volume: Option) -> Self { self.external_volume = external_volume; self } + /// Set the catalog for the database. pub fn catalog(mut self, catalog: Option) -> Self { self.catalog = catalog; self } + /// Set whether to replace invalid characters. pub fn replace_invalid_characters(mut self, replace_invalid_characters: Option) -> Self { self.replace_invalid_characters = replace_invalid_characters; self } + /// Set the default DDL collation. pub fn default_ddl_collation(mut self, default_ddl_collation: Option) -> Self { self.default_ddl_collation = default_ddl_collation; self } + /// Set the storage serialization policy. pub fn storage_serialization_policy( mut self, storage_serialization_policy: Option, @@ -174,16 +222,31 @@ impl CreateDatabaseBuilder { self } + /// Set the comment for the database. pub fn comment(mut self, comment: Option) -> Self { self.comment = comment; self } + /// Set the default character set for the database. + pub fn default_charset(mut self, default_charset: Option) -> Self { + self.default_charset = default_charset; + self + } + + /// Set the default collation for the database. + pub fn default_collation(mut self, default_collation: Option) -> Self { + self.default_collation = default_collation; + self + } + + /// Set the catalog sync for the database. pub fn catalog_sync(mut self, catalog_sync: Option) -> Self { self.catalog_sync = catalog_sync; self } + /// Set the catalog sync namespace mode for the database. pub fn catalog_sync_namespace_mode( mut self, catalog_sync_namespace_mode: Option, @@ -192,6 +255,7 @@ impl CreateDatabaseBuilder { self } + /// Set the catalog sync namespace flatten delimiter for the database. pub fn catalog_sync_namespace_flatten_delimiter( mut self, catalog_sync_namespace_flatten_delimiter: Option, @@ -200,16 +264,19 @@ impl CreateDatabaseBuilder { self } + /// Set the tags for the database. pub fn with_tags(mut self, with_tags: Option>) -> Self { self.with_tags = with_tags; self } + /// Set the contacts for the database. pub fn with_contacts(mut self, with_contacts: Option>) -> Self { self.with_contacts = with_contacts; self } + /// Build the `CREATE DATABASE` statement. pub fn build(self) -> Statement { Statement::CreateDatabase { db_name: self.db_name, @@ -227,6 +294,8 @@ impl CreateDatabaseBuilder { default_ddl_collation: self.default_ddl_collation, storage_serialization_policy: self.storage_serialization_policy, comment: self.comment, + default_charset: self.default_charset, + default_collation: self.default_collation, catalog_sync: self.catalog_sync, catalog_sync_namespace_mode: self.catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter: self.catalog_sync_namespace_flatten_delimiter, @@ -257,6 +326,8 @@ impl TryFrom for CreateDatabaseBuilder { default_ddl_collation, storage_serialization_policy, comment, + default_charset, + default_collation, catalog_sync, catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter, @@ -278,6 +349,8 @@ impl TryFrom for CreateDatabaseBuilder { default_ddl_collation, storage_serialization_policy, comment, + default_charset, + default_collation, catalog_sync, catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter, diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index fe950c909c..ab2feb6930 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -25,10 +25,11 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ - ClusteredBy, ColumnDef, CommentDef, CreateTable, CreateTableLikeKind, CreateTableOptions, Expr, - FileFormat, HiveDistributionStyle, HiveFormat, Ident, InitializeKind, ObjectName, OnCommit, - OneOrManyWithParens, Query, RefreshModeKind, RowAccessPolicy, Statement, - StorageSerializationPolicy, TableConstraint, TableVersion, Tag, WrappedCollection, + ClusteredBy, ColumnDef, CommentDef, CreateTable, CreateTableLikeKind, CreateTableOptions, + DistStyle, Expr, FileFormat, ForValues, HiveDistributionStyle, HiveFormat, Ident, + InitializeKind, ObjectName, OnCommit, OneOrManyWithParens, Query, RefreshModeKind, + RowAccessPolicy, Statement, StorageLifecyclePolicy, StorageSerializationPolicy, + TableConstraint, TableVersion, Tag, WrappedCollection, }; use crate::parser::ParserError; @@ -64,60 +65,128 @@ use crate::parser::ParserError; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateTableBuilder { + /// Whether the statement uses `OR REPLACE`. pub or_replace: bool, + /// Whether the table is `TEMPORARY`. pub temporary: bool, + /// Whether the table is `EXTERNAL`. pub external: bool, + /// Optional `GLOBAL` flag for dialects that support it. pub global: Option, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, + /// Whether `TRANSIENT` was specified. pub transient: bool, + /// Whether `VOLATILE` was specified. pub volatile: bool, + /// Iceberg-specific table flag. pub iceberg: bool, + /// `SNAPSHOT` table flag. + pub snapshot: bool, + /// Whether `DYNAMIC` table option is set. pub dynamic: bool, + /// The table name. pub name: ObjectName, + /// Column definitions for the table. pub columns: Vec, + /// Table-level constraints. pub constraints: Vec, + /// Hive distribution style. pub hive_distribution: HiveDistributionStyle, + /// Optional Hive format settings. pub hive_formats: Option, + /// Optional file format for storage. pub file_format: Option, + /// Optional storage location. pub location: Option, + /// Optional `AS SELECT` query for the table. pub query: Option>, + /// Whether `WITHOUT ROWID` is set. pub without_rowid: bool, + /// Optional `LIKE` clause kind. pub like: Option, + /// Optional `CLONE` source object name. pub clone: Option, + /// Optional table version. pub version: Option, + /// Optional table comment. pub comment: Option, + /// Optional `ON COMMIT` behavior. pub on_commit: Option, + /// Optional cluster identifier. pub on_cluster: Option, + /// Optional primary key expression. pub primary_key: Option>, + /// Optional `ORDER BY` for clustering/sorting. pub order_by: Option>, + /// Optional `PARTITION BY` expression. pub partition_by: Option>, + /// Optional `CLUSTER BY` expressions. pub cluster_by: Option>>, + /// Optional `CLUSTERED BY` clause. pub clustered_by: Option, + /// Optional parent tables (`INHERITS`). pub inherits: Option>, + /// Optional partitioned table (`PARTITION OF`) + pub partition_of: Option, + /// Range of values associated with the partition (`FOR VALUES`) + pub for_values: Option, + /// `STRICT` table flag. pub strict: bool, + /// Whether to copy grants from the source. pub copy_grants: bool, + /// Optional flag for schema evolution support. pub enable_schema_evolution: Option, + /// Optional change tracking flag. pub change_tracking: Option, + /// Optional data retention time in days. pub data_retention_time_in_days: Option, + /// Optional max data extension time in days. pub max_data_extension_time_in_days: Option, + /// Optional default DDL collation. pub default_ddl_collation: Option, + /// Optional aggregation policy object name. pub with_aggregation_policy: Option, + /// Optional row access policy applied to the table. pub with_row_access_policy: Option, + /// Optional storage lifecycle policy applied to the table. + pub with_storage_lifecycle_policy: Option, + /// Optional tags/labels attached to the table metadata. pub with_tags: Option>, + /// Optional base location for staged data. pub base_location: Option, + /// Optional external volume identifier. pub external_volume: Option, + /// Optional catalog name. pub catalog: Option, + /// Optional catalog synchronization option. pub catalog_sync: Option, + /// Optional storage serialization policy. pub storage_serialization_policy: Option, + /// Parsed table options from the statement. pub table_options: CreateTableOptions, + /// Optional target lag configuration. pub target_lag: Option, + /// Optional warehouse identifier. pub warehouse: Option, + /// Optional refresh mode for materialized tables. pub refresh_mode: Option, + /// Optional initialization kind for the table. pub initialize: Option, + /// Whether operations require a user identity. pub require_user: bool, + /// Redshift `DISTSTYLE` option. + pub diststyle: Option, + /// Redshift `DISTKEY` option. + pub distkey: Option, + /// Redshift `SORTKEY` option. + pub sortkey: Option>, + /// Redshift `BACKUP` option. + pub backup: Option, } impl CreateTableBuilder { + /// Create a new `CreateTableBuilder` for the given table name. pub fn new(name: ObjectName) -> Self { Self { or_replace: false, @@ -128,6 +197,7 @@ impl CreateTableBuilder { transient: false, volatile: false, iceberg: false, + snapshot: false, dynamic: false, name, columns: vec![], @@ -150,6 +220,8 @@ impl CreateTableBuilder { cluster_by: None, clustered_by: None, inherits: None, + partition_of: None, + for_values: None, strict: false, copy_grants: false, enable_schema_evolution: None, @@ -159,6 +231,7 @@ impl CreateTableBuilder { default_ddl_collation: None, with_aggregation_policy: None, with_row_access_policy: None, + with_storage_lifecycle_policy: None, with_tags: None, base_location: None, external_volume: None, @@ -171,177 +244,202 @@ impl CreateTableBuilder { refresh_mode: None, initialize: None, require_user: false, + diststyle: None, + distkey: None, + sortkey: None, + backup: None, } } + /// Set `OR REPLACE` for the CREATE TABLE statement. pub fn or_replace(mut self, or_replace: bool) -> Self { self.or_replace = or_replace; self } - + /// Mark the table as `TEMPORARY`. pub fn temporary(mut self, temporary: bool) -> Self { self.temporary = temporary; self } - + /// Mark the table as `EXTERNAL`. pub fn external(mut self, external: bool) -> Self { self.external = external; self } - + /// Set optional `GLOBAL` flag (dialect-specific). pub fn global(mut self, global: Option) -> Self { self.global = global; self } - + /// Set `IF NOT EXISTS`. pub fn if_not_exists(mut self, if_not_exists: bool) -> Self { self.if_not_exists = if_not_exists; self } - + /// Set `TRANSIENT` flag. pub fn transient(mut self, transient: bool) -> Self { self.transient = transient; self } - + /// Set `VOLATILE` flag. pub fn volatile(mut self, volatile: bool) -> Self { self.volatile = volatile; self } - + /// Enable Iceberg table semantics. pub fn iceberg(mut self, iceberg: bool) -> Self { self.iceberg = iceberg; self } - + /// Set `SNAPSHOT` table flag (BigQuery). + pub fn snapshot(mut self, snapshot: bool) -> Self { + self.snapshot = snapshot; + self + } + /// Set `DYNAMIC` table option. pub fn dynamic(mut self, dynamic: bool) -> Self { self.dynamic = dynamic; self } - + /// Set the table column definitions. pub fn columns(mut self, columns: Vec) -> Self { self.columns = columns; self } - + /// Set table-level constraints. pub fn constraints(mut self, constraints: Vec) -> Self { self.constraints = constraints; self } - + /// Set Hive distribution style. pub fn hive_distribution(mut self, hive_distribution: HiveDistributionStyle) -> Self { self.hive_distribution = hive_distribution; self } - + /// Set Hive-specific formats. pub fn hive_formats(mut self, hive_formats: Option) -> Self { self.hive_formats = hive_formats; self } - + /// Set file format for the table (e.g., PARQUET). pub fn file_format(mut self, file_format: Option) -> Self { self.file_format = file_format; self } + /// Set storage `location` for the table. pub fn location(mut self, location: Option) -> Self { self.location = location; self } - + /// Set an underlying `AS SELECT` query for the table. pub fn query(mut self, query: Option>) -> Self { self.query = query; self } + /// Set `WITHOUT ROWID` option. pub fn without_rowid(mut self, without_rowid: bool) -> Self { self.without_rowid = without_rowid; self } - + /// Set `LIKE` clause for the table. pub fn like(mut self, like: Option) -> Self { self.like = like; self } - // Different name to allow the object to be cloned + /// Set `CLONE` source object name. pub fn clone_clause(mut self, clone: Option) -> Self { self.clone = clone; self } - + /// Set table `VERSION`. pub fn version(mut self, version: Option) -> Self { self.version = version; self } - + /// Set a comment for the table or following column definitions. pub fn comment_after_column_def(mut self, comment: Option) -> Self { self.comment = comment; self } - + /// Set `ON COMMIT` behavior for temporary tables. pub fn on_commit(mut self, on_commit: Option) -> Self { self.on_commit = on_commit; self } - + /// Set cluster identifier for the table. pub fn on_cluster(mut self, on_cluster: Option) -> Self { self.on_cluster = on_cluster; self } - + /// Set a primary key expression for the table. pub fn primary_key(mut self, primary_key: Option>) -> Self { self.primary_key = primary_key; self } - + /// Set `ORDER BY` clause for clustered/sorted tables. pub fn order_by(mut self, order_by: Option>) -> Self { self.order_by = order_by; self } - + /// Set `PARTITION BY` expression. pub fn partition_by(mut self, partition_by: Option>) -> Self { self.partition_by = partition_by; self } - + /// Set `CLUSTER BY` expression(s). pub fn cluster_by(mut self, cluster_by: Option>>) -> Self { self.cluster_by = cluster_by; self } - + /// Set `CLUSTERED BY` clause. pub fn clustered_by(mut self, clustered_by: Option) -> Self { self.clustered_by = clustered_by; self } - + /// Set parent tables via `INHERITS`. pub fn inherits(mut self, inherits: Option>) -> Self { self.inherits = inherits; self } + /// Sets the table which is partitioned to create the current table. + pub fn partition_of(mut self, partition_of: Option) -> Self { + self.partition_of = partition_of; + self + } + + /// Sets the range of values associated with the partition. + pub fn for_values(mut self, for_values: Option) -> Self { + self.for_values = for_values; + self + } + + /// Set `STRICT` option. pub fn strict(mut self, strict: bool) -> Self { self.strict = strict; self } - + /// Enable copying grants from source object. pub fn copy_grants(mut self, copy_grants: bool) -> Self { self.copy_grants = copy_grants; self } - + /// Enable or disable schema evolution features. pub fn enable_schema_evolution(mut self, enable_schema_evolution: Option) -> Self { self.enable_schema_evolution = enable_schema_evolution; self } - + /// Enable or disable change tracking. pub fn change_tracking(mut self, change_tracking: Option) -> Self { self.change_tracking = change_tracking; self } - + /// Set data retention time (in days). pub fn data_retention_time_in_days(mut self, data_retention_time_in_days: Option) -> Self { self.data_retention_time_in_days = data_retention_time_in_days; self } - + /// Set maximum data extension time (in days). pub fn max_data_extension_time_in_days( mut self, max_data_extension_time_in_days: Option, @@ -349,17 +447,17 @@ impl CreateTableBuilder { self.max_data_extension_time_in_days = max_data_extension_time_in_days; self } - + /// Set default DDL collation. pub fn default_ddl_collation(mut self, default_ddl_collation: Option) -> Self { self.default_ddl_collation = default_ddl_collation; self } - + /// Set aggregation policy object. pub fn with_aggregation_policy(mut self, with_aggregation_policy: Option) -> Self { self.with_aggregation_policy = with_aggregation_policy; self } - + /// Attach a row access policy to the table. pub fn with_row_access_policy( mut self, with_row_access_policy: Option, @@ -367,32 +465,40 @@ impl CreateTableBuilder { self.with_row_access_policy = with_row_access_policy; self } - + /// Attach a storage lifecycle policy to the table. + pub fn with_storage_lifecycle_policy( + mut self, + with_storage_lifecycle_policy: Option, + ) -> Self { + self.with_storage_lifecycle_policy = with_storage_lifecycle_policy; + self + } + /// Attach tags/labels to the table metadata. pub fn with_tags(mut self, with_tags: Option>) -> Self { self.with_tags = with_tags; self } - + /// Set a base storage location for staged data. pub fn base_location(mut self, base_location: Option) -> Self { self.base_location = base_location; self } - + /// Set an external volume identifier. pub fn external_volume(mut self, external_volume: Option) -> Self { self.external_volume = external_volume; self } - + /// Set the catalog name for the table. pub fn catalog(mut self, catalog: Option) -> Self { self.catalog = catalog; self } - + /// Set catalog synchronization option. pub fn catalog_sync(mut self, catalog_sync: Option) -> Self { self.catalog_sync = catalog_sync; self } - + /// Set a storage serialization policy. pub fn storage_serialization_policy( mut self, storage_serialization_policy: Option, @@ -400,38 +506,58 @@ impl CreateTableBuilder { self.storage_serialization_policy = storage_serialization_policy; self } - + /// Set arbitrary table options parsed from the statement. pub fn table_options(mut self, table_options: CreateTableOptions) -> Self { self.table_options = table_options; self } - + /// Set a target lag configuration (dialect-specific). pub fn target_lag(mut self, target_lag: Option) -> Self { self.target_lag = target_lag; self } - + /// Associate the table with a warehouse identifier. pub fn warehouse(mut self, warehouse: Option) -> Self { self.warehouse = warehouse; self } - + /// Set refresh mode for materialized/managed tables. pub fn refresh_mode(mut self, refresh_mode: Option) -> Self { self.refresh_mode = refresh_mode; self } - + /// Set initialization mode for the table. pub fn initialize(mut self, initialize: Option) -> Self { self.initialize = initialize; self } - + /// Require a user identity for table operations. pub fn require_user(mut self, require_user: bool) -> Self { self.require_user = require_user; self } - - pub fn build(self) -> Statement { + /// Set Redshift `DISTSTYLE` option. + pub fn diststyle(mut self, diststyle: Option) -> Self { + self.diststyle = diststyle; + self + } + /// Set Redshift `DISTKEY` option. + pub fn distkey(mut self, distkey: Option) -> Self { + self.distkey = distkey; + self + } + /// Set Redshift `SORTKEY` option. + pub fn sortkey(mut self, sortkey: Option>) -> Self { + self.sortkey = sortkey; + self + } + /// Set the Redshift `BACKUP` option. + pub fn backup(mut self, backup: Option) -> Self { + self.backup = backup; + self + } + /// Consume the builder and produce a `CreateTable`. + pub fn build(self) -> CreateTable { CreateTable { or_replace: self.or_replace, temporary: self.temporary, @@ -441,6 +567,7 @@ impl CreateTableBuilder { transient: self.transient, volatile: self.volatile, iceberg: self.iceberg, + snapshot: self.snapshot, dynamic: self.dynamic, name: self.name, columns: self.columns, @@ -463,6 +590,8 @@ impl CreateTableBuilder { cluster_by: self.cluster_by, clustered_by: self.clustered_by, inherits: self.inherits, + partition_of: self.partition_of, + for_values: self.for_values, strict: self.strict, copy_grants: self.copy_grants, enable_schema_evolution: self.enable_schema_evolution, @@ -472,6 +601,7 @@ impl CreateTableBuilder { default_ddl_collation: self.default_ddl_collation, with_aggregation_policy: self.with_aggregation_policy, with_row_access_policy: self.with_row_access_policy, + with_storage_lifecycle_policy: self.with_storage_lifecycle_policy, with_tags: self.with_tags, base_location: self.base_location, external_volume: self.external_volume, @@ -484,8 +614,11 @@ impl CreateTableBuilder { refresh_mode: self.refresh_mode, initialize: self.initialize, require_user: self.require_user, + diststyle: self.diststyle, + distkey: self.distkey, + sortkey: self.sortkey, + backup: self.backup, } - .into() } } @@ -496,111 +629,7 @@ impl TryFrom for CreateTableBuilder { // ownership. fn try_from(stmt: Statement) -> Result { match stmt { - Statement::CreateTable(CreateTable { - or_replace, - temporary, - external, - global, - if_not_exists, - transient, - volatile, - iceberg, - dynamic, - name, - columns, - constraints, - hive_distribution, - hive_formats, - file_format, - location, - query, - without_rowid, - like, - clone, - version, - comment, - on_commit, - on_cluster, - primary_key, - order_by, - partition_by, - cluster_by, - clustered_by, - inherits, - strict, - copy_grants, - enable_schema_evolution, - change_tracking, - data_retention_time_in_days, - max_data_extension_time_in_days, - default_ddl_collation, - with_aggregation_policy, - with_row_access_policy, - with_tags, - base_location, - external_volume, - catalog, - catalog_sync, - storage_serialization_policy, - table_options, - target_lag, - warehouse, - refresh_mode, - initialize, - require_user, - }) => Ok(Self { - or_replace, - temporary, - external, - global, - if_not_exists, - transient, - dynamic, - name, - columns, - constraints, - hive_distribution, - hive_formats, - file_format, - location, - query, - without_rowid, - like, - clone, - version, - comment, - on_commit, - on_cluster, - primary_key, - order_by, - partition_by, - cluster_by, - clustered_by, - inherits, - strict, - iceberg, - copy_grants, - enable_schema_evolution, - change_tracking, - data_retention_time_in_days, - max_data_extension_time_in_days, - default_ddl_collation, - with_aggregation_policy, - with_row_access_policy, - with_tags, - volatile, - base_location, - external_volume, - catalog, - catalog_sync, - storage_serialization_policy, - table_options, - target_lag, - warehouse, - refresh_mode, - initialize, - require_user, - }), + Statement::CreateTable(create_table) => Ok(create_table.into()), _ => Err(ParserError::ParserError(format!( "Expected create table statement, but received: {stmt}" ))), @@ -608,6 +637,72 @@ impl TryFrom for CreateTableBuilder { } } +impl From for CreateTableBuilder { + fn from(table: CreateTable) -> Self { + Self { + or_replace: table.or_replace, + temporary: table.temporary, + external: table.external, + global: table.global, + if_not_exists: table.if_not_exists, + transient: table.transient, + volatile: table.volatile, + iceberg: table.iceberg, + snapshot: table.snapshot, + dynamic: table.dynamic, + name: table.name, + columns: table.columns, + constraints: table.constraints, + hive_distribution: table.hive_distribution, + hive_formats: table.hive_formats, + file_format: table.file_format, + location: table.location, + query: table.query, + without_rowid: table.without_rowid, + like: table.like, + clone: table.clone, + version: table.version, + comment: table.comment, + on_commit: table.on_commit, + on_cluster: table.on_cluster, + primary_key: table.primary_key, + order_by: table.order_by, + partition_by: table.partition_by, + cluster_by: table.cluster_by, + clustered_by: table.clustered_by, + inherits: table.inherits, + partition_of: table.partition_of, + for_values: table.for_values, + strict: table.strict, + copy_grants: table.copy_grants, + enable_schema_evolution: table.enable_schema_evolution, + change_tracking: table.change_tracking, + data_retention_time_in_days: table.data_retention_time_in_days, + max_data_extension_time_in_days: table.max_data_extension_time_in_days, + default_ddl_collation: table.default_ddl_collation, + with_aggregation_policy: table.with_aggregation_policy, + with_row_access_policy: table.with_row_access_policy, + with_storage_lifecycle_policy: table.with_storage_lifecycle_policy, + with_tags: table.with_tags, + base_location: table.base_location, + external_volume: table.external_volume, + catalog: table.catalog, + catalog_sync: table.catalog_sync, + storage_serialization_policy: table.storage_serialization_policy, + table_options: table.table_options, + target_lag: table.target_lag, + warehouse: table.warehouse, + refresh_mode: table.refresh_mode, + initialize: table.initialize, + require_user: table.require_user, + diststyle: table.diststyle, + distkey: table.distkey, + sortkey: table.sortkey, + backup: table.backup, + } + } +} + /// Helper return type when parsing configuration for a `CREATE TABLE` statement. #[derive(Default)] pub(crate) struct CreateTableConfiguration { @@ -627,7 +722,8 @@ mod tests { pub fn test_from_valid_statement() { let builder = CreateTableBuilder::new(ObjectName::from(vec![Ident::new("table_name")])); - let stmt = builder.clone().build(); + let create_table = builder.clone().build(); + let stmt: Statement = create_table.into(); assert_eq!(builder, CreateTableBuilder::try_from(stmt).unwrap()); } diff --git a/src/ast/helpers/stmt_data_loading.rs b/src/ast/helpers/stmt_data_loading.rs index 92a7272799..dfc1f4b0bf 100644 --- a/src/ast/helpers/stmt_data_loading.rs +++ b/src/ast/helpers/stmt_data_loading.rs @@ -34,11 +34,17 @@ use sqlparser_derive::{Visit, VisitMut}; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Parameters for a named stage object used in data loading/unloading. pub struct StageParamsObject { + /// Optional URL for the stage. pub url: Option, + /// Encryption-related key/value options. pub encryption: KeyValueOptions, + /// Optional endpoint string. pub endpoint: Option, + /// Optional storage integration identifier. pub storage_integration: Option, + /// Credentials for accessing the stage. pub credentials: KeyValueOptions, } @@ -48,7 +54,9 @@ pub struct StageParamsObject { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum StageLoadSelectItemKind { + /// A standard SQL select item expression. SelectItem(SelectItem), + /// A Snowflake-specific select item used for stage loading. StageLoadSelectItem(StageLoadSelectItem), } @@ -64,10 +72,15 @@ impl fmt::Display for StageLoadSelectItemKind { #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single item in the `SELECT` list for data loading from staged files. pub struct StageLoadSelectItem { + /// Optional alias for the input source. pub alias: Option, + /// Column number within the staged file (1-based). pub file_col_num: i32, + /// Optional element identifier following the column reference. pub element: Option, + /// Optional alias for the item (AS clause). pub item_as: Option, } @@ -99,15 +112,15 @@ impl fmt::Display for StageParamsObject { impl fmt::Display for StageLoadSelectItem { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.alias.is_some() { - write!(f, "{}.", self.alias.as_ref().unwrap())?; + if let Some(alias) = &self.alias { + write!(f, "{alias}.")?; } write!(f, "${}", self.file_col_num)?; - if self.element.is_some() { - write!(f, ":{}", self.element.as_ref().unwrap())?; + if let Some(element) = &self.element { + write!(f, ":{element}")?; } - if self.item_as.is_some() { - write!(f, " AS {}", self.item_as.as_ref().unwrap())?; + if let Some(item_as) = &self.item_as { + write!(f, " AS {item_as}")?; } Ok(()) } @@ -116,9 +129,12 @@ impl fmt::Display for StageLoadSelectItem { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A command to stage files to a named stage. pub struct FileStagingCommand { + /// The stage to which files are being staged. #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub stage: ObjectName, + /// Optional file matching `PATTERN` expression. pub pattern: Option, } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 8021cdf254..8ca056ff4a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -56,31 +56,41 @@ pub use self::data_type::{ ExactNumberInfo, IntervalFields, StructBracketKind, TimezoneInfo, }; pub use self::dcl::{ - AlterRoleOperation, CreateRole, ResetConfig, RoleOption, SecondaryRoles, SetConfigValue, Use, + AlterRoleOperation, CreateRole, Grant, ResetConfig, Revoke, RoleOption, SecondaryRoles, + SetConfigValue, Use, }; pub use self::ddl::{ - Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, - AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm, - AlterTableLock, AlterTableOperation, AlterTableType, AlterType, AlterTypeAddValue, - AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, - ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, - ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, - CreateExtension, CreateFunction, CreateIndex, CreateOperator, CreateOperatorClass, - CreateOperatorFamily, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial, - DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, DropOperatorFamily, - DropOperatorSignature, DropTrigger, GeneratedAs, GeneratedExpressionMode, IdentityParameters, + Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, AlterOperator, + AlterOperatorClass, AlterOperatorClassOperation, AlterOperatorFamily, + AlterOperatorFamilyOperation, AlterOperatorOperation, AlterPolicy, AlterPolicyOperation, + AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm, AlterTableLock, + AlterTableOperation, AlterTableType, AlterType, AlterTypeAddValue, AlterTypeAddValuePosition, + AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, ClusteredBy, ColumnDef, + ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, ColumnPolicyProperty, + ConstraintCharacteristics, CreateConnector, CreateDomain, CreateExtension, CreateFunction, + CreateIndex, CreateOperator, CreateOperatorClass, CreateOperatorFamily, CreatePolicy, + CreatePolicyCommand, CreatePolicyType, CreateTable, CreateTrigger, CreateView, Deduplicate, + DeferrableInitial, DistStyle, DropBehavior, DropExtension, DropFunction, DropOperator, + DropOperatorClass, DropOperatorFamily, DropOperatorSignature, DropPolicy, DropTrigger, + ForValues, FunctionReturnType, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, - OperatorArgTypes, OperatorClassItem, OperatorPurpose, Owner, Partition, ProcedureParam, + OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem, + OperatorOption, OperatorPurpose, Owner, Partition, PartitionBoundValue, ProcedureParam, ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, }; -pub use self::dml::{Delete, Insert, Update}; +pub use self::dml::{ + Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, + MergeInsertKind, MergeUpdateExpr, MultiTableInsertIntoClause, MultiTableInsertType, + MultiTableInsertValue, MultiTableInsertValues, MultiTableInsertWhenClause, OutputClause, + Update, +}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, + AfterMatchSkip, ConnectByKind, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, ExprWithAliasAndOrderBy, Fetch, ForClause, ForJson, ForXml, FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, @@ -90,14 +100,15 @@ pub use self::query::{ OffsetRows, OpenJsonTableColumn, OrderBy, OrderByExpr, OrderByKind, OrderByOptions, PipeOperator, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, - SelectFlavor, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, SetOperator, - SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, - TableFunctionArgs, TableIndexHintForClause, TableIndexHintType, TableIndexHints, - TableIndexType, TableSample, TableSampleBucket, TableSampleKind, TableSampleMethod, - TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, - TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, - ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, XmlNamespaceDefinition, - XmlPassingArgument, XmlPassingClause, XmlTableColumn, XmlTableColumnOption, + SelectFlavor, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SelectModifiers, + SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, + TableAliasColumnDef, TableFactor, TableFunctionArgs, TableIndexHintForClause, + TableIndexHintType, TableIndexHints, TableIndexType, TableSample, TableSampleBucket, + TableSampleKind, TableSampleMethod, TableSampleModifier, TableSampleQuantity, TableSampleSeed, + TableSampleSeedModifier, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, + UpdateTableFromKind, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, + XmlNamespaceDefinition, XmlPassingArgument, XmlPassingClause, XmlTableColumn, + XmlTableColumnOption, }; pub use self::trigger::{ @@ -107,7 +118,7 @@ pub use self::trigger::{ pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, - NormalizationForm, TrimWhereField, Value, ValueWithSpan, + NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan, }; use crate::ast::helpers::key_value_options::KeyValueOptions; @@ -122,23 +133,26 @@ mod data_type; mod dcl; mod ddl; mod dml; +/// Helper modules for building and manipulating AST nodes. pub mod helpers; pub mod table_constraints; pub use table_constraints::{ - CheckConstraint, ForeignKeyConstraint, FullTextOrSpatialConstraint, IndexConstraint, - PrimaryKeyConstraint, TableConstraint, UniqueConstraint, + CheckConstraint, ConstraintUsingIndex, ForeignKeyConstraint, FullTextOrSpatialConstraint, + IndexConstraint, PrimaryKeyConstraint, TableConstraint, UniqueConstraint, }; mod operator; mod query; mod spans; pub use spans::Spanned; +pub mod comments; mod trigger; mod value; #[cfg(feature = "visitor")] mod visitor; +/// Helper used to format a slice using a separator string (e.g., `", "`). pub struct DisplaySeparated<'a, T> where T: fmt::Display, @@ -284,6 +298,7 @@ impl Ident { } } + /// Create an `Ident` with the given `span` and `value` (unquoted). pub fn with_span(span: Span, value: S) -> Self where S: Into, @@ -295,6 +310,7 @@ impl Ident { } } + /// Create a quoted `Ident` with the given `quote` and `span`. pub fn with_quote_and_span(quote: char, span: Span, value: S) -> Self where S: Into, @@ -344,6 +360,12 @@ impl From> for ObjectName { } } +impl From for ObjectName { + fn from(ident: Ident) -> Self { + ObjectName(vec![ObjectNamePart::Identifier(ident)]) + } +} + impl fmt::Display for ObjectName { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", display_separated(&self.0, ".")) @@ -355,11 +377,14 @@ impl fmt::Display for ObjectName { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ObjectNamePart { + /// A single identifier part, e.g. `schema` or `table`. Identifier(Ident), + /// A function that returns an identifier (dialect-specific). Function(ObjectNamePartFunction), } impl ObjectNamePart { + /// Return the identifier if this is an `Identifier` variant. pub fn as_ident(&self) -> Option<&Ident> { match self { ObjectNamePart::Identifier(ident) => Some(ident), @@ -385,7 +410,9 @@ impl fmt::Display for ObjectNamePart { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ObjectNamePartFunction { + /// The function name that produces the object name part. pub name: Ident, + /// Function arguments used to compute the identifier. pub args: Vec, } @@ -432,14 +459,17 @@ impl fmt::Display for Array { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Interval { + /// The interval value expression (commonly a string literal). pub value: Box, + /// Optional leading time unit (e.g., `HOUR`, `MINUTE`). pub leading_field: Option, + /// Optional leading precision for the leading field. pub leading_precision: Option, + /// Optional trailing time unit for a range (e.g., `SECOND`). pub last_field: Option, - /// The seconds precision can be specified in SQL source as - /// `INTERVAL '__' SECOND(_, x)` (in which case the `leading_field` - /// will be `Second` and the `last_field` will be `None`), - /// or as `__ TO SECOND(x)`. + /// The fractional seconds precision, when specified. + /// + /// See SQL `SECOND(n)` or `SECOND(m, n)` forms. pub fractional_seconds_precision: Option, } @@ -491,9 +521,11 @@ impl fmt::Display for Interval { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct StructField { + /// Optional name of the struct field. pub field_name: Option, + /// The field data type. pub field_type: DataType, - /// Struct field options. + /// Struct field options (e.g., `OPTIONS(...)` on BigQuery). /// See [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#column_name_and_column_schema) pub options: Option>, } @@ -520,7 +552,9 @@ impl fmt::Display for StructField { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct UnionField { + /// Name of the union field. pub field_name: Ident, + /// Type of the union field. pub field_type: DataType, } @@ -537,7 +571,9 @@ impl fmt::Display for UnionField { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DictionaryField { + /// Dictionary key identifier. pub key: Ident, + /// Value expression for the dictionary entry. pub value: Box, } @@ -552,6 +588,7 @@ impl fmt::Display for DictionaryField { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Map { + /// Entries of the map as key/value pairs. pub entries: Vec, } @@ -568,7 +605,9 @@ impl Display for Map { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct MapEntry { + /// Key expression of the map entry. pub key: Box, + /// Value expression of the map entry. pub value: Box, } @@ -584,8 +623,10 @@ impl fmt::Display for MapEntry { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CastFormat { - Value(Value), - ValueAtTimeZone(Value, Value), + /// A simple cast format specified by a `Value`. + Value(ValueWithSpan), + /// A cast format with an explicit time zone: `(format, timezone)`. + ValueAtTimeZone(ValueWithSpan, ValueWithSpan), } /// An element of a JSON path. @@ -596,7 +637,12 @@ pub enum JsonPathElem { /// Accesses an object field using dot notation, e.g. `obj:foo.bar.baz`. /// /// See . - Dot { key: String, quoted: bool }, + Dot { + /// The object key text (without quotes). + key: String, + /// `true` when the key was quoted in the source. + quoted: bool, + }, /// Accesses an object field or array element using bracket notation, /// e.g. `obj['foo']`. /// @@ -604,7 +650,18 @@ pub enum JsonPathElem { /// former is case-insensitive but the latter is not. /// /// See . - Bracket { key: Expr }, + Bracket { + /// The expression used as the bracket key (string or numeric expression). + key: Expr, + }, + /// Access an object field using colon bracket notation + /// e.g. `obj:['foo']` + /// + /// See + ColonBracket { + /// The expression used as the bracket key (string or numeric expression). + key: Expr, + }, /// Accesses all elements in the given (generally array) element. Used for /// constructs like `foo:bar[*].baz`. /// @@ -624,6 +681,7 @@ pub struct JsonPath { /// `a['b']`, whereas others (e.g. Databricks) require the colon even in this case /// (so `a:['b']`). pub has_colon: bool, + /// Sequence of path elements that form the JSON path. pub path: Vec, } @@ -651,6 +709,9 @@ impl fmt::Display for JsonPath { JsonPathElem::AllElements => { write!(f, "[*]")?; } + JsonPathElem::ColonBracket { key } => { + write!(f, ":[{key}]")?; + } } } Ok(()) @@ -733,7 +794,7 @@ pub enum CeilFloorKind { /// `CEIL( TO )` DateTimeField(DateTimeField), /// `CEIL( [, ])` - Scale(Value), + Scale(ValueWithSpan), } /// A WHEN clause in a CASE expression containing both @@ -742,7 +803,9 @@ pub enum CeilFloorKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CaseWhen { + /// The `WHEN` condition expression. pub condition: Expr, + /// The expression returned when `condition` matches. pub result: Expr, } @@ -805,7 +868,9 @@ pub enum Expr { /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` /// - If a struct access likes `a.field1.field2`, it will be represented by CompoundIdentifier([a, field1, field2]) CompoundFieldAccess { + /// The base expression being accessed. root: Box, + /// Sequence of access operations (subscript or identifier accesses). access_chain: Vec, }, /// Access data nested in a value containing semi-structured data, such as @@ -841,102 +906,148 @@ pub enum Expr { IsNotDistinctFrom(Box, Box), /// ` IS [ NOT ] [ form ] NORMALIZED` IsNormalized { + /// Expression being tested. expr: Box, + /// Optional normalization `form` (e.g., NFC, NFD). form: Option, + /// `true` when `NOT` is present. negated: bool, }, /// `[ NOT ] IN (val1, val2, ...)` InList { + /// Left-hand expression to test for membership. expr: Box, + /// Literal list of expressions to check against. list: Vec, + /// `true` when the `NOT` modifier is present. negated: bool, }, /// `[ NOT ] IN (SELECT ...)` InSubquery { + /// Left-hand expression to test for membership. expr: Box, + /// The subquery providing the candidate values. subquery: Box, + /// `true` when the `NOT` modifier is present. negated: bool, }, /// XXX not valid SQL syntax, this is a hack needed to support parameter substitution /// `[ NOT ] IN ` InExpr { + /// Left-hand expression to test for membership. expr: Box, + /// The expression providing the candidate values (used for parameter substitution). in_expr: Box, + /// `true` when the `NOT` modifier is present. negated: bool, }, /// `[ NOT ] IN UNNEST(array_expression)` InUnnest { + /// Left-hand expression to test for membership. expr: Box, + /// Array expression being unnested. array_expr: Box, + /// `true` when the `NOT` modifier is present. negated: bool, }, /// ` [ NOT ] BETWEEN AND ` Between { + /// Expression being compared. expr: Box, + /// `true` when the `NOT` modifier is present. negated: bool, + /// Lower bound. low: Box, + /// Upper bound. high: Box, }, /// Binary operation e.g. `1 + 1` or `foo > bar` BinaryOp { + /// Left operand. left: Box, + /// Operator between operands. op: BinaryOperator, + /// Right operand. right: Box, }, /// `[NOT] LIKE [ESCAPE ]` Like { + /// `true` when `NOT` is present. negated: bool, - // Snowflake supports the ANY keyword to match against a list of patterns - // https://docs.snowflake.com/en/sql-reference/functions/like_any + /// Snowflake supports the ANY keyword to match against a list of patterns + /// any: bool, + /// Expression to match. expr: Box, + /// Pattern expression. pattern: Box, - escape_char: Option, + /// Optional escape character. + escape_char: Option, }, /// `ILIKE` (case-insensitive `LIKE`) ILike { + /// `true` when `NOT` is present. negated: bool, - // Snowflake supports the ANY keyword to match against a list of patterns - // https://docs.snowflake.com/en/sql-reference/functions/like_any + /// Snowflake supports the ANY keyword to match against a list of patterns + /// any: bool, + /// Expression to match. expr: Box, + /// Pattern expression. pattern: Box, - escape_char: Option, + /// Optional escape character. + escape_char: Option, }, - /// SIMILAR TO regex + /// `SIMILAR TO` regex SimilarTo { + /// `true` when `NOT` is present. negated: bool, + /// Expression to test. expr: Box, + /// Pattern expression. pattern: Box, - escape_char: Option, + /// Optional escape character. + escape_char: Option, }, - /// MySQL: RLIKE regex or REGEXP regex + /// MySQL: `RLIKE` regex or `REGEXP` regex RLike { + /// `true` when `NOT` is present. negated: bool, + /// Expression to test. expr: Box, + /// Pattern expression. pattern: Box, - // true for REGEXP, false for RLIKE (no difference in semantics) + /// true for REGEXP, false for RLIKE (no difference in semantics) regexp: bool, }, /// `ANY` operation e.g. `foo > ANY(bar)`, comparison operator is one of `[=, >, <, =>, =<, !=]` /// AnyOp { + /// Left operand. left: Box, + /// Comparison operator. compare_op: BinaryOperator, + /// Right-hand subquery expression. right: Box, - // ANY and SOME are synonymous: https://docs.cloudera.com/cdw-runtime/cloud/using-hiveql/topics/hive_comparison_predicates.html + /// ANY and SOME are synonymous: is_some: bool, }, /// `ALL` operation e.g. `foo > ALL(bar)`, comparison operator is one of `[=, >, <, =>, =<, !=]` /// AllOp { + /// Left operand. left: Box, + /// Comparison operator. compare_op: BinaryOperator, + /// Right-hand subquery expression. right: Box, }, + /// Unary operation e.g. `NOT foo` UnaryOp { + /// The unary operator (e.g., `NOT`, `-`). op: UnaryOperator, + /// Operand expression. expr: Box, }, /// CONVERT a value to a different data type or character encoding. e.g. `CONVERT(foo USING utf8mb4)` @@ -944,13 +1055,13 @@ pub enum Expr { /// CONVERT (false) or TRY_CONVERT (true) /// is_try: bool, - /// The expression to convert + /// The expression to convert. expr: Box, - /// The target data type + /// The target data type, if provided. data_type: Option, - /// The target character encoding + /// Optional target character encoding (e.g., `utf8mb4`). charset: Option, - /// whether the target comes before the expr (MSSQL syntax) + /// `true` when target precedes the value (MSSQL syntax). target_before_value: bool, /// How to translate the expression. /// @@ -959,9 +1070,18 @@ pub enum Expr { }, /// `CAST` an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` Cast { + /// The cast kind (e.g., `CAST`, `TRY_CAST`). kind: CastKind, + /// Expression being cast. expr: Box, + /// Target data type. data_type: DataType, + /// [MySQL] allows CAST(... AS type ARRAY) in functional index definitions for InnoDB + /// multi-valued indices. It's not really a datatype, and is only allowed in `CAST` in key + /// specifications, so it's a flag here. + /// + /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html#function_cast + array: bool, /// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by [BigQuery] /// /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax @@ -969,7 +1089,9 @@ pub enum Expr { }, /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` AtTimeZone { + /// Timestamp expression to shift. timestamp: Box, + /// Time zone expression to apply. time_zone: Box, }, /// Extract a field from a timestamp e.g. `EXTRACT(MONTH FROM foo)` @@ -980,8 +1102,11 @@ pub enum Expr { /// EXTRACT(DateTimeField FROM ) | EXTRACT(DateTimeField, ) /// ``` Extract { + /// Which datetime field is being extracted. field: DateTimeField, + /// Syntax variant used (`From` or `Comma`). syntax: ExtractSyntax, + /// Expression to extract from. expr: Box, }, /// ```sql @@ -991,7 +1116,9 @@ pub enum Expr { /// CEIL( [, ] ) /// ``` Ceil { + /// Expression to ceil. expr: Box, + /// The CEIL/FLOOR kind (datetime field or scale). field: CeilFloorKind, }, /// ```sql @@ -1001,14 +1128,18 @@ pub enum Expr { /// FLOOR( [, ] ) /// Floor { + /// Expression to floor. expr: Box, + /// The CEIL/FLOOR kind (datetime field or scale). field: CeilFloorKind, }, /// ```sql /// POSITION( in ) /// ``` Position { + /// Expression to search for. expr: Box, + /// Expression to search in. r#in: Box, }, /// ```sql @@ -1019,8 +1150,11 @@ pub enum Expr { /// SUBSTRING(, , ) /// ``` Substring { + /// Source expression. expr: Box, + /// Optional `FROM` expression. substring_from: Option>, + /// Optional `FOR` expression. substring_for: Option>, /// false if the expression is represented using the `SUBSTRING(expr [FROM start] [FOR len])` syntax @@ -1035,27 +1169,36 @@ pub enum Expr { /// ```sql /// TRIM([BOTH | LEADING | TRAILING] [ FROM] ) /// TRIM() - /// TRIM(, [, characters]) -- only Snowflake or Bigquery + /// TRIM(, [, characters]) -- PostgreSQL, DuckDB, Snowflake, BigQuery, Generic /// ``` Trim { - expr: Box, - // ([BOTH | LEADING | TRAILING] + /// Which side to trim: `BOTH`, `LEADING`, or `TRAILING`. trim_where: Option, + /// Optional expression specifying what to trim from the value `expr`. trim_what: Option>, + /// The expression to trim from. + expr: Box, + /// Optional list of characters to trim (dialect-specific). trim_characters: Option>, }, /// ```sql /// OVERLAY( PLACING FROM [ FOR ] /// ``` Overlay { + /// The target expression being overlayed. expr: Box, + /// The expression to place into the target. overlay_what: Box, + /// The `FROM` position expression indicating where to start overlay. overlay_from: Box, + /// Optional `FOR` length expression limiting the overlay span. overlay_for: Option>, }, /// `expr COLLATE collation` Collate { + /// The expression being collated. expr: Box, + /// The collation name to apply to the expression. collation: ObjectName, }, /// Nested expression e.g. `(foo > bar)` or `(1)` @@ -1066,8 +1209,9 @@ pub enum Expr { /// /// Prefixed { + /// The prefix identifier (introducer or projection prefix). prefix: Ident, - /// The value of the constant. + /// The value expression being prefixed. /// Hint: you can unwrap the string value using `value.into_string()`. value: Box, }, @@ -1083,16 +1227,23 @@ pub enum Expr { /// not `< 0` nor `1, 2, 3` as allowed in a `` per /// Case { + /// The attached `CASE` token (keeps original spacing/comments). case_token: AttachedToken, + /// The attached `END` token (keeps original spacing/comments). end_token: AttachedToken, + /// Optional operand expression after `CASE` (for simple CASE). operand: Option>, + /// The `WHEN ... THEN` conditions and results. conditions: Vec, + /// Optional `ELSE` result expression. else_result: Option>, }, /// An exists expression `[ NOT ] EXISTS(SELECT ...)`, used in expressions like /// `WHERE [ NOT ] EXISTS (SELECT ...)`. Exists { + /// The subquery checked by `EXISTS`. subquery: Box, + /// Whether the `EXISTS` is negated (`NOT EXISTS`). negated: bool, }, /// A parenthesized subquery `(SELECT ...)`, used in expression like @@ -1128,7 +1279,9 @@ pub enum Expr { /// ``` /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Named { + /// The expression being named. expr: Box, + /// The assigned identifier name for the expression. name: Ident, }, /// `DuckDB` specific `Struct` literal expression [1] @@ -1165,10 +1318,11 @@ pub enum Expr { /// `(, , ...)`. columns: Vec, /// ``. - match_value: Value, + match_value: ValueWithSpan, /// `` opt_search_modifier: Option, }, + /// An unqualified `*` wildcard token (e.g. `*`). Wildcard(AttachedToken), /// Qualified wildcard, e.g. `alias.*` or `schema.table.*`. /// (Same caveats apply to `QualifiedWildcard` as to `Wildcard`.) @@ -1218,7 +1372,10 @@ impl Expr { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Subscript { /// Accesses the element of the array at the given index. - Index { index: Expr }, + Index { + /// The index expression used to access the array element. + index: Expr, + }, /// Accesses a slice of an array on PostgreSQL, e.g. /// @@ -1242,8 +1399,11 @@ pub enum Subscript { /// {1,3,5} /// ``` Slice { + /// Optional lower bound for the slice (inclusive). lower_bound: Option, + /// Optional upper bound for the slice (inclusive). upper_bound: Option, + /// Optional stride for the slice (step size). stride: Option, }, } @@ -1301,17 +1461,71 @@ impl fmt::Display for AccessExpr { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct LambdaFunction { /// The parameters to the lambda function. - pub params: OneOrManyWithParens, + pub params: OneOrManyWithParens, /// The body of the lambda function. pub body: Box, + /// The syntax style used to write the lambda function. + pub syntax: LambdaSyntax, } impl fmt::Display for LambdaFunction { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{} -> {}", self.params, self.body) + match self.syntax { + LambdaSyntax::Arrow => write!(f, "{} -> {}", self.params, self.body), + LambdaSyntax::LambdaKeyword => { + // For lambda keyword syntax, display params without parentheses + // e.g., `lambda x, y : expr` not `lambda (x, y) : expr` + write!(f, "lambda ")?; + match &self.params { + OneOrManyWithParens::One(p) => write!(f, "{p}")?, + OneOrManyWithParens::Many(ps) => write!(f, "{}", display_comma_separated(ps))?, + }; + write!(f, " : {}", self.body) + } + } + } +} + +/// A parameter to a lambda function, optionally with a data type. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct LambdaFunctionParameter { + /// The name of the parameter + pub name: Ident, + /// The optional data type of the parameter + /// [Snowflake Syntax](https://docs.snowflake.com/en/sql-reference/functions/filter#arguments) + pub data_type: Option, +} + +impl fmt::Display for LambdaFunctionParameter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.data_type { + Some(dt) => write!(f, "{} {}", self.name, dt), + None => write!(f, "{}", self.name), + } } } +/// The syntax style for a lambda function. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Copy)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum LambdaSyntax { + /// Arrow syntax: `param -> expr` or `(param1, param2) -> expr` + /// + /// + /// + /// Supported, but deprecated in DuckDB: + /// + Arrow, + /// Lambda keyword syntax: `lambda param : expr` or `lambda param1, param2 : expr` + /// + /// Recommended in DuckDB: + /// + LambdaKeyword, +} + /// Encapsulates the common pattern in SQL where either one unparenthesized item /// such as an identifier or expression is permitted, or multiple of the same /// item in a parenthesized list. For accessing items regardless of the form, @@ -1746,14 +1960,18 @@ impl fmt::Display for Expr { kind, expr, data_type, + array, format, } => match kind { CastKind::Cast => { + write!(f, "CAST({expr} AS {data_type}")?; + if *array { + write!(f, " ARRAY")?; + } if let Some(format) = format { - write!(f, "CAST({expr} AS {data_type} FORMAT {format})") - } else { - write!(f, "CAST({expr} AS {data_type})") + write!(f, " FORMAT {format}")?; } + write!(f, ")") } CastKind::TryCast => { if let Some(format) = format { @@ -2004,11 +2222,21 @@ impl fmt::Display for Expr { } } +/// The type of a window used in `OVER` clauses. +/// +/// A window can be either an inline specification (`WindowSpec`) or a +/// reference to a previously defined named window. +/// +/// - `WindowSpec(WindowSpec)`: An inline window specification, e.g. +/// `OVER (PARTITION BY ... ORDER BY ...)`. +/// - `NamedWindow(Ident)`: A reference to a named window declared elsewhere. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum WindowType { + /// An inline window specification. WindowSpec(WindowSpec), + /// A reference to a previously defined named window. NamedWindow(Ident), } @@ -2103,7 +2331,9 @@ impl fmt::Display for WindowSpec { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WindowFrame { + /// Units for the frame (e.g. `ROWS`, `RANGE`, `GROUPS`). pub units: WindowFrameUnits, + /// The start bound of the window frame. pub start_bound: WindowFrameBound, /// The right bound of the `BETWEEN .. AND` clause. The end bound of `None` /// indicates the shorthand form (e.g. `ROWS 1 PRECEDING`), which must @@ -2128,9 +2358,13 @@ impl Default for WindowFrame { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Units used to describe the window frame scope. pub enum WindowFrameUnits { + /// `ROWS` unit. Rows, + /// `RANGE` unit. Range, + /// `GROUPS` unit. Groups, } @@ -2150,8 +2384,11 @@ impl fmt::Display for WindowFrameUnits { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// How NULL values are treated in certain window functions. pub enum NullTreatment { + /// Ignore NULL values (e.g. `IGNORE NULLS`). IgnoreNulls, + /// Respect NULL values (e.g. `RESPECT NULLS`). RespectNulls, } @@ -2192,9 +2429,13 @@ impl fmt::Display for WindowFrameBound { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Indicates partition operation type for partition management statements. pub enum AddDropSync { + /// Add partitions. ADD, + /// Drop partitions. DROP, + /// Sync partitions. SYNC, } @@ -2211,12 +2452,19 @@ impl fmt::Display for AddDropSync { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Object kinds supported by `SHOW CREATE` statements. pub enum ShowCreateObject { + /// An event object for `SHOW CREATE EVENT`. Event, + /// A function object for `SHOW CREATE FUNCTION`. Function, + /// A procedure object for `SHOW CREATE PROCEDURE`. Procedure, + /// A table object for `SHOW CREATE TABLE`. Table, + /// A trigger object for `SHOW CREATE TRIGGER`. Trigger, + /// A view object for `SHOW CREATE VIEW`. View, } @@ -2236,26 +2484,58 @@ impl fmt::Display for ShowCreateObject { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Objects that can be targeted by a `COMMENT` statement. pub enum CommentObject { + /// A table column. Column, - Table, + /// A database. + Database, + /// A domain. + Domain, + /// An extension. Extension, + /// A function. + Function, + /// An index. + Index, + /// A materialized view. + MaterializedView, + /// A procedure. + Procedure, + /// A role. + Role, + /// A schema. Schema, - Database, + /// A sequence. + Sequence, + /// A table. + Table, + /// A type. + Type, + /// A user. User, - Role, + /// A view. + View, } impl fmt::Display for CommentObject { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { CommentObject::Column => f.write_str("COLUMN"), - CommentObject::Table => f.write_str("TABLE"), + CommentObject::Database => f.write_str("DATABASE"), + CommentObject::Domain => f.write_str("DOMAIN"), CommentObject::Extension => f.write_str("EXTENSION"), + CommentObject::Function => f.write_str("FUNCTION"), + CommentObject::Index => f.write_str("INDEX"), + CommentObject::MaterializedView => f.write_str("MATERIALIZED VIEW"), + CommentObject::Procedure => f.write_str("PROCEDURE"), + CommentObject::Role => f.write_str("ROLE"), CommentObject::Schema => f.write_str("SCHEMA"), - CommentObject::Database => f.write_str("DATABASE"), + CommentObject::Sequence => f.write_str("SEQUENCE"), + CommentObject::Table => f.write_str("TABLE"), + CommentObject::Type => f.write_str("TYPE"), CommentObject::User => f.write_str("USER"), - CommentObject::Role => f.write_str("ROLE"), + CommentObject::View => f.write_str("VIEW"), } } } @@ -2263,8 +2543,11 @@ impl fmt::Display for CommentObject { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Password specification variants used in user-related statements. pub enum Password { + /// A concrete password expression. Password(Expr), + /// Represents a `NULL` password. NullPassword, } @@ -2290,8 +2573,11 @@ pub enum Password { pub struct CaseStatement { /// The `CASE` token that starts the statement. pub case_token: AttachedToken, + /// Optional expression to match against in `CASE ... WHEN`. pub match_expr: Option, + /// The `WHEN ... THEN` blocks of the `CASE` statement. pub when_blocks: Vec, + /// Optional `ELSE` block for the `CASE` statement. pub else_block: Option, /// The last token of the statement (`END` or `CASE`). pub end_case_token: AttachedToken, @@ -2358,9 +2644,13 @@ impl fmt::Display for CaseStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IfStatement { + /// The initial `IF` block containing the condition and statements. pub if_block: ConditionalStatementBlock, + /// Additional `ELSEIF` blocks. pub elseif_blocks: Vec, + /// Optional `ELSE` block. pub else_block: Option, + /// Optional trailing `END` token for the `IF` statement. pub end_token: Option, } @@ -2406,6 +2696,7 @@ impl fmt::Display for IfStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WhileStatement { + /// Block executed while the condition holds. pub while_block: ConditionalStatementBlock, } @@ -2445,13 +2736,18 @@ impl fmt::Display for WhileStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ConditionalStatementBlock { + /// Token representing the start of the block (e.g., WHEN/IF/WHILE). pub start_token: AttachedToken, + /// Optional condition expression for the block. pub condition: Option, + /// Optional token for the `THEN` keyword. pub then_token: Option, + /// The statements contained in this conditional block. pub conditional_statements: ConditionalStatements, } impl ConditionalStatementBlock { + /// Get the statements in this conditional block. pub fn statements(&self) -> &Vec { self.conditional_statements.statements() } @@ -2488,14 +2784,19 @@ impl fmt::Display for ConditionalStatementBlock { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Statements used inside conditional blocks (`IF`, `WHEN`, `WHILE`). pub enum ConditionalStatements { - /// SELECT 1; SELECT 2; SELECT 3; ... - Sequence { statements: Vec }, - /// BEGIN SELECT 1; SELECT 2; SELECT 3; ... END + /// Simple sequence of statements (no `BEGIN`/`END`). + Sequence { + /// The statements in the sequence. + statements: Vec, + }, + /// Block enclosed by `BEGIN` and `END`. BeginEnd(BeginEndStatements), } impl ConditionalStatements { + /// Get the statements in this conditional statements block. pub fn statements(&self) -> &Vec { match self { ConditionalStatements::Sequence { statements } => statements, @@ -2530,8 +2831,11 @@ impl fmt::Display for ConditionalStatements { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct BeginEndStatements { + /// Token representing the `BEGIN` keyword (may include span info). pub begin_token: AttachedToken, + /// Statements contained within the block. pub statements: Vec, + /// Token representing the `END` keyword (may include span info). pub end_token: AttachedToken, } @@ -2571,6 +2875,7 @@ impl fmt::Display for BeginEndStatements { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct RaiseStatement { + /// Optional value provided to the RAISE statement. pub value: Option, } @@ -2607,6 +2912,41 @@ impl fmt::Display for RaiseStatementValue { } } +/// A MSSQL `THROW` statement. +/// +/// ```sql +/// THROW [ error_number, message, state ] +/// ``` +/// +/// [MSSQL](https://learn.microsoft.com/en-us/sql/t-sql/language-elements/throw-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ThrowStatement { + /// Error number expression. + pub error_number: Option>, + /// Error message expression. + pub message: Option>, + /// State expression. + pub state: Option>, +} + +impl fmt::Display for ThrowStatement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let ThrowStatement { + error_number, + message, + state, + } = self; + + write!(f, "THROW")?; + if let (Some(error_number), Some(message), Some(state)) = (error_number, message, state) { + write!(f, " {error_number}, {message}, {state}")?; + } + Ok(()) + } +} + /// Represents an expression assignment within a variable `DECLARE` statement. /// /// Examples: @@ -2828,33 +3168,18 @@ impl fmt::Display for Declare { #[derive(Debug, Default, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Options allowed within a `CREATE TABLE` statement. pub enum CreateTableOptions { + /// No options specified. #[default] None, - /// Options specified using the `WITH` keyword. - /// e.g. `WITH (description = "123")` - /// - /// - /// - /// MSSQL supports more specific options that's not only key-value pairs. - /// - /// WITH ( - /// DISTRIBUTION = ROUND_ROBIN, - /// CLUSTERED INDEX (column_a DESC, column_b) - /// ) - /// - /// + /// Options specified using the `WITH` keyword, e.g. `WITH (k = v)`. With(Vec), - /// Options specified using the `OPTIONS` keyword. - /// e.g. `OPTIONS(description = "123")` - /// - /// + /// Options specified using the `OPTIONS(...)` clause. Options(Vec), - - /// Plain options, options which are not part on any declerative statement e.g. WITH/OPTIONS/... - /// + /// Plain space-separated options. Plain(Vec), - + /// Table properties (e.g., TBLPROPERTIES / storage properties). TableProperties(Vec), } @@ -2907,56 +3232,40 @@ impl Display for FromTable { } } -/// Policy type for a `CREATE POLICY` statement. -/// ```sql -/// AS [ PERMISSIVE | RESTRICTIVE ] -/// ``` -/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum CreatePolicyType { - Permissive, - Restrictive, -} - -/// Policy command for a `CREATE POLICY` statement. -/// ```sql -/// FOR [ALL | SELECT | INSERT | UPDATE | DELETE] -/// ``` -/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum CreatePolicyCommand { - All, - Select, - Insert, - Update, - Delete, -} - #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Variants for the `SET` family of statements. pub enum Set { /// SQL Standard-style /// SET a = 1; + /// `SET var = value` (standard SQL-style assignment). SingleAssignment { + /// Optional scope modifier (`SESSION` / `LOCAL`). scope: Option, + /// Whether this is a Hive-style `HIVEVAR:` assignment. hivevar: bool, + /// Variable name to assign. variable: ObjectName, + /// Values assigned to the variable. values: Vec, }, /// Snowflake-style /// SET (a, b, ..) = (1, 2, ..); + /// `SET (a, b) = (1, 2)` (tuple assignment syntax). ParenthesizedAssignments { + /// Variables being assigned in tuple form. variables: Vec, + /// Corresponding values for the variables. values: Vec, }, /// MySQL-style /// SET a = 1, b = 2, ..; - MultipleAssignments { assignments: Vec }, + /// `SET a = 1, b = 2` (MySQL-style comma-separated assignments). + MultipleAssignments { + /// List of `SET` assignments (MySQL-style comma-separated). + assignments: Vec, + }, /// Session authorization for Postgres/Redshift /// /// ```sql @@ -2993,12 +3302,21 @@ pub enum Set { /// Note: this is a PostgreSQL-specific statements /// `SET TIME ZONE ` is an alias for `SET timezone TO ` in PostgreSQL /// However, we allow it for all dialects. - SetTimeZone { local: bool, value: Expr }, + /// `SET TIME ZONE` statement. `local` indicates the `LOCAL` keyword. + /// `SET TIME ZONE ` statement. + SetTimeZone { + /// Whether the `LOCAL` keyword was specified. + local: bool, + /// Time zone expression value. + value: Expr, + }, /// ```sql /// SET NAMES 'charset_name' [COLLATE 'collation_name'] /// ``` SetNames { + /// Character set name to set. charset_name: Ident, + /// Optional collation name. collation_name: Option, }, /// ```sql @@ -3011,8 +3329,11 @@ pub enum Set { /// SET TRANSACTION ... /// ``` SetTransaction { + /// Transaction modes (e.g., ISOLATION LEVEL, READ ONLY). modes: Vec, - snapshot: Option, + /// Optional snapshot value for transaction snapshot control. + snapshot: Option, + /// `true` when the `SESSION` keyword was used. session: bool, }, } @@ -3113,7 +3434,9 @@ impl Display for Set { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ExceptionWhen { + /// Identifiers that trigger this branch (error conditions). pub idents: Vec, + /// Statements to execute when the condition matches. pub statements: Vec, } @@ -3134,40 +3457,52 @@ impl Display for ExceptionWhen { } } -/// ANALYZE TABLE statement (Hive-specific) +/// ANALYZE statement +/// +/// Supported syntax varies by dialect: +/// - Hive: `ANALYZE TABLE t [PARTITION (...)] COMPUTE STATISTICS [NOSCAN] [FOR COLUMNS [col1, ...]] [CACHE METADATA]` +/// - PostgreSQL: `ANALYZE [VERBOSE] [t [(col1, ...)]]` See +/// - General: `ANALYZE [TABLE] t` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Analyze { #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - pub table_name: ObjectName, + /// Name of the table to analyze. `None` for bare `ANALYZE`. + pub table_name: Option, + /// Optional partition expressions to restrict the analysis. pub partitions: Option>, + /// `true` when analyzing specific columns (Hive `FOR COLUMNS` syntax). pub for_columns: bool, + /// Columns to analyze. pub columns: Vec, + /// Whether to cache metadata before analyzing. pub cache_metadata: bool, + /// Whether to skip scanning the table. pub noscan: bool, + /// Whether to compute statistics during analysis. pub compute_statistics: bool, + /// Whether the `TABLE` keyword was present. pub has_table_keyword: bool, } impl fmt::Display for Analyze { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "ANALYZE{}{table_name}", + write!(f, "ANALYZE")?; + if let Some(ref table_name) = self.table_name { if self.has_table_keyword { - " TABLE " - } else { - " " - }, - table_name = self.table_name - )?; + write!(f, " TABLE")?; + } + write!(f, " {table_name}")?; + } + if !self.for_columns && !self.columns.is_empty() { + write!(f, " ({})", display_comma_separated(&self.columns))?; + } if let Some(ref parts) = self.partitions { if !parts.is_empty() { write!(f, " PARTITION ({})", display_comma_separated(parts))?; } } - if self.compute_statistics { write!(f, " COMPUTE STATISTICS")?; } @@ -3202,6 +3537,7 @@ pub enum Statement { /// ``` /// Analyze (Hive) Analyze(Analyze), + /// `SET` statements (session, transaction, timezone, etc.). Set(Set), /// ```sql /// TRUNCATE @@ -3236,11 +3572,17 @@ pub enum Statement { extension_name: Ident, }, // TODO: Support ROW FORMAT + /// LOAD DATA from a directory or query source. Directory { + /// Whether to overwrite existing files. overwrite: bool, + /// Whether the directory is local to the server. local: bool, + /// Path to the directory or files. path: String, + /// Optional file format for the data. file_format: Option, + /// Source query providing data to load. source: Box, }, /// A `CASE` statement. @@ -3284,19 +3626,33 @@ pub enum Statement { /// in different enums. This can be refactored later once custom dialects /// are allowed to have custom Statements. CopyIntoSnowflake { + /// Kind of COPY INTO operation (table or location). kind: CopyIntoSnowflakeKind, + /// Target object for the COPY INTO operation. into: ObjectName, + /// Optional list of target columns. into_columns: Option>, + /// Optional source object name (staged data). from_obj: Option, + /// Optional alias for the source object. from_obj_alias: Option, + /// Stage-specific parameters (e.g., credentials, path). stage_params: StageParamsObject, + /// Optional list of transformations applied when loading. from_transformations: Option>, + /// Optional source query instead of a staged object. from_query: Option>, + /// Optional list of specific file names to load. files: Option>, + /// Optional filename matching pattern. pattern: Option, + /// File format options. file_format: KeyValueOptions, + /// Additional copy options. copy_options: KeyValueOptions, + /// Optional validation mode string. validation_mode: Option, + /// Optional partition expression for loading. partition: Option>, }, /// ```sql @@ -3334,9 +3690,13 @@ pub enum Statement { /// Sqlite specific statement CreateVirtualTable { #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + /// Name of the virtual table module instance. name: ObjectName, + /// `true` when `IF NOT EXISTS` was specified. if_not_exists: bool, + /// Module name used by the virtual table. module_name: Ident, + /// Arguments passed to the module. module_args: Vec, }, /// ```sql @@ -3353,12 +3713,19 @@ pub enum Statement { /// ``` /// See [DuckDB](https://duckdb.org/docs/sql/statements/create_secret.html) CreateSecret { + /// `true` when `OR REPLACE` was specified. or_replace: bool, + /// Optional `TEMPORARY` flag. temporary: Option, + /// `true` when `IF NOT EXISTS` was present. if_not_exists: bool, + /// Optional secret name. name: Option, + /// Optional storage specifier identifier. storage_specifier: Option, + /// The secret type identifier. secret_type: Ident, + /// Additional secret options. options: Vec, }, /// A `CREATE SERVER` statement. @@ -3367,16 +3734,7 @@ pub enum Statement { /// CREATE POLICY /// ``` /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) - CreatePolicy { - name: Ident, - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - table_name: ObjectName, - policy_type: Option, - command: Option, - to: Option>, - using: Option, - with_check: Option, - }, + CreatePolicy(CreatePolicy), /// ```sql /// CREATE CONNECTOR /// ``` @@ -3410,18 +3768,23 @@ pub enum Statement { /// ALTER INDEX /// ``` AlterIndex { + /// Name of the index to alter. name: ObjectName, + /// The operation to perform on the index. operation: AlterIndexOperation, }, /// ```sql /// ALTER VIEW /// ``` AlterView { - /// View name + /// View name being altered. #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] name: ObjectName, + /// Optional new column list for the view. columns: Vec, + /// Replacement query for the view definition. query: Box, + /// Additional WITH options for the view. with_options: Vec, }, /// ```sql @@ -3430,22 +3793,34 @@ pub enum Statement { /// ``` AlterType(AlterType), /// ```sql + /// ALTER OPERATOR + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteroperator.html) + AlterOperator(AlterOperator), + /// ```sql + /// ALTER OPERATOR FAMILY + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteropfamily.html) + AlterOperatorFamily(AlterOperatorFamily), + /// ```sql + /// ALTER OPERATOR CLASS + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteropclass.html) + AlterOperatorClass(AlterOperatorClass), + /// ```sql /// ALTER ROLE /// ``` AlterRole { + /// Role name being altered. name: Ident, + /// Operation to perform on the role. operation: AlterRoleOperation, }, /// ```sql /// ALTER POLICY ON
[] /// ``` /// (Postgresql-specific) - AlterPolicy { - name: Ident, - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - table_name: ObjectName, - operation: AlterPolicyOperation, - }, + AlterPolicy(AlterPolicy), /// ```sql /// ALTER CONNECTOR connector_name SET DCPROPERTIES(property_name=property_value, ...); /// or @@ -3455,9 +3830,13 @@ pub enum Statement { /// ``` /// (Hive-specific) AlterConnector { + /// Name of the connector to alter. name: Ident, + /// Optional connector properties to set. properties: Option>, + /// Optional new URL for the connector. url: Option, + /// Optional new owner specification. owner: Option, }, /// ```sql @@ -3489,12 +3868,15 @@ pub enum Statement { /// ``` /// See AttachDuckDBDatabase { + /// `true` when `IF NOT EXISTS` was present. if_not_exists: bool, - /// true if the syntax is 'ATTACH DATABASE', false if it's just 'ATTACH' + /// `true` if the syntax used `ATTACH DATABASE` rather than `ATTACH`. database: bool, - /// An expression that indicates the path to the database file + /// The path identifier to the database file being attached. database_path: Ident, + /// Optional alias assigned to the attached database. database_alias: Option, + /// Dialect-specific attach options (e.g., `READ_ONLY`). attach_options: Vec, }, /// (DuckDB-specific) @@ -3503,9 +3885,11 @@ pub enum Statement { /// ``` /// See DetachDuckDBDatabase { + /// `true` when `IF EXISTS` was present. if_exists: bool, - /// true if the syntax is 'DETACH DATABASE', false if it's just 'DETACH' + /// `true` if the syntax used `DETACH DATABASE` rather than `DETACH`. database: bool, + /// Alias of the database to detach. database_alias: Ident, }, /// ```sql @@ -3549,37 +3933,39 @@ pub enum Statement { /// DROP PROCEDURE /// ``` DropProcedure { + /// `true` when `IF EXISTS` was present. if_exists: bool, - /// One or more function to drop + /// One or more functions/procedures to drop. proc_desc: Vec, - /// `CASCADE` or `RESTRICT` + /// Optional drop behavior (`CASCADE` or `RESTRICT`). drop_behavior: Option, }, /// ```sql /// DROP SECRET /// ``` DropSecret { + /// `true` when `IF EXISTS` was present. if_exists: bool, + /// Optional `TEMPORARY` marker. temporary: Option, + /// Name of the secret to drop. name: Ident, + /// Optional storage specifier identifier. storage_specifier: Option, }, ///```sql /// DROP POLICY /// ``` /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-droppolicy.html) - DropPolicy { - if_exists: bool, - name: Ident, - table_name: ObjectName, - drop_behavior: Option, - }, + DropPolicy(DropPolicy), /// ```sql /// DROP CONNECTOR /// ``` /// See [Hive](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362034#LanguageManualDDL-DropConnector) DropConnector { + /// `true` when `IF EXISTS` was present. if_exists: bool, + /// Name of the connector to drop. name: Ident, }, /// ```sql @@ -3590,6 +3976,7 @@ pub enum Statement { /// Note: this is a PostgreSQL-specific statement, /// but may also compatible with other SQL. Declare { + /// Cursor declaration statements collected by `DECLARE`. stmts: Vec, }, /// ```sql @@ -3635,9 +4022,11 @@ pub enum Statement { Fetch { /// Cursor name name: Ident, + /// The fetch direction (e.g., `FORWARD`, `BACKWARD`). direction: FetchDirection, + /// The fetch position (e.g., `ALL`, `NEXT`, `ABSOLUTE`). position: FetchPosition, - /// Optional, It's possible to fetch rows form cursor to the table + /// Optional target table to fetch rows into. into: Option, }, /// ```sql @@ -3647,11 +4036,17 @@ pub enum Statement { /// Note: this is a Mysql-specific statement, /// but may also compatible with other SQL. Flush { + /// The specific flush option or object to flush. object_type: FlushType, + /// Optional flush location (dialect-specific). location: Option, + /// Optional channel name used for flush operations. channel: Option, + /// Whether a read lock was requested. read_lock: bool, + /// Whether this is an export flush operation. export: bool, + /// Optional list of tables involved in the flush. tables: Vec, }, /// ```sql @@ -3661,12 +4056,14 @@ pub enum Statement { /// Note: this is a PostgreSQL-specific statement, /// but may also compatible with other SQL. Discard { + /// The kind of object(s) to discard (ALL, PLANS, etc.). object_type: DiscardObject, }, /// `SHOW FUNCTIONS` /// /// Note: this is a Presto-specific statement. ShowFunctions { + /// Optional filter for which functions to display. filter: Option, }, /// ```sql @@ -3675,6 +4072,7 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement. ShowVariable { + /// Variable name as one or more identifiers. variable: Vec, }, /// ```sql @@ -3683,8 +4081,11 @@ pub enum Statement { /// /// Note: this is a MySQL-specific statement. ShowStatus { + /// Optional filter for which status entries to display. filter: Option, + /// `true` when `GLOBAL` scope was requested. global: bool, + /// `true` when `SESSION` scope was requested. session: bool, }, /// ```sql @@ -3693,8 +4094,11 @@ pub enum Statement { /// /// Note: this is a MySQL-specific statement. ShowVariables { + /// Optional filter for which variables to display. filter: Option, + /// `true` when `GLOBAL` scope was requested. global: bool, + /// `true` when `SESSION` scope was requested. session: bool, }, /// ```sql @@ -3703,31 +4107,42 @@ pub enum Statement { /// /// Note: this is a MySQL-specific statement. ShowCreate { + /// The kind of object being shown (TABLE, VIEW, etc.). obj_type: ShowCreateObject, + /// The name of the object to show create statement for. obj_name: ObjectName, }, /// ```sql /// SHOW COLUMNS /// ``` ShowColumns { + /// `true` when extended column information was requested. extended: bool, + /// `true` when full column details were requested. full: bool, + /// Additional options for `SHOW COLUMNS`. show_options: ShowStatementOptions, }, /// ```sql /// SHOW DATABASES /// ``` ShowDatabases { + /// `true` when terse output format was requested. terse: bool, + /// `true` when history information was requested. history: bool, + /// Additional options for `SHOW DATABASES`. show_options: ShowStatementOptions, }, /// ```sql /// SHOW SCHEMAS /// ``` ShowSchemas { + /// `true` when terse (compact) output was requested. terse: bool, + /// `true` when history information was requested. history: bool, + /// Additional options for `SHOW SCHEMAS`. show_options: ShowStatementOptions, }, // ```sql @@ -3735,6 +4150,7 @@ pub enum Statement { // ``` // [MySQL]: // + /// Show the available character sets (alias `CHARSET`). ShowCharset(ShowCharset), /// ```sql /// SHOW OBJECTS LIKE 'line%' IN mydb.public @@ -3746,19 +4162,28 @@ pub enum Statement { /// SHOW TABLES /// ``` ShowTables { + /// `true` when terse output format was requested (compact listing). terse: bool, + /// `true` when history rows are requested. history: bool, + /// `true` when extended information should be shown. extended: bool, + /// `true` when a full listing was requested. full: bool, + /// `true` when external tables should be included. external: bool, + /// Additional options for `SHOW` statements. show_options: ShowStatementOptions, }, /// ```sql /// SHOW VIEWS /// ``` ShowViews { + /// `true` when terse output format was requested. terse: bool, + /// `true` when materialized views should be included. materialized: bool, + /// Additional options for `SHOW` statements. show_options: ShowStatementOptions, }, /// ```sql @@ -3767,6 +4192,7 @@ pub enum Statement { /// /// Note: this is a MySQL-specific statement. ShowCollation { + /// Optional filter for which collations to display. filter: Option, }, /// ```sql @@ -3783,9 +4209,13 @@ pub enum Statement { /// ``` /// If `begin` is true StartTransaction { + /// Transaction modes such as `ISOLATION LEVEL` or `READ WRITE`. modes: Vec, + /// `true` when this was parsed as `BEGIN` instead of `START`. begin: bool, + /// Optional specific keyword used: `TRANSACTION` or `WORK`. transaction: Option, + /// Optional transaction modifier (e.g., `AND NO CHAIN`). modifier: Option, /// List of statements belonging to the `BEGIN` block. /// Example: @@ -3819,8 +4249,11 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement. Comment { + /// Type of object being commented (table, column, etc.). object_type: CommentObject, + /// Name of the object the comment applies to. object_name: ObjectName, + /// Optional comment text (None to remove comment). comment: Option, /// An optional `IF EXISTS` clause. (Non-standard.) /// See @@ -3836,15 +4269,20 @@ pub enum Statement { /// ``` /// If `end` is true Commit { + /// `true` when `AND [ NO ] CHAIN` was present. chain: bool, + /// `true` when this `COMMIT` was parsed as an `END` block terminator. end: bool, + /// Optional transaction modifier for commit semantics. modifier: Option, }, /// ```sql /// ROLLBACK [ TRANSACTION | WORK ] [ AND [ NO ] CHAIN ] [ TO [ SAVEPOINT ] savepoint_name ] /// ``` Rollback { + /// `true` when `AND [ NO ] CHAIN` was present. chain: bool, + /// Optional savepoint name to roll back to. savepoint: Option, }, /// ```sql @@ -3853,6 +4291,7 @@ pub enum Statement { CreateSchema { /// ` | AUTHORIZATION | AUTHORIZATION ` schema_name: SchemaName, + /// `true` when `IF NOT EXISTS` was present. if_not_exists: bool, /// Schema properties. /// @@ -3893,25 +4332,49 @@ pub enum Statement { /// See: /// CreateDatabase { + /// Database name. db_name: ObjectName, + /// `IF NOT EXISTS` flag. if_not_exists: bool, + /// Optional location URI. location: Option, + /// Optional managed location. managed_location: Option, + /// `OR REPLACE` flag. or_replace: bool, + /// `TRANSIENT` flag. transient: bool, + /// Optional clone source. clone: Option, + /// Optional data retention time in days. data_retention_time_in_days: Option, + /// Optional maximum data extension time in days. max_data_extension_time_in_days: Option, + /// Optional external volume identifier. external_volume: Option, + /// Optional catalog name. catalog: Option, + /// Whether to replace invalid characters. replace_invalid_characters: Option, + /// Default DDL collation string. default_ddl_collation: Option, + /// Storage serialization policy. storage_serialization_policy: Option, + /// Optional comment. comment: Option, + /// Optional default character set (MySQL). + default_charset: Option, + /// Optional default collation (MySQL). + default_collation: Option, + /// Optional catalog sync identifier. catalog_sync: Option, + /// Catalog sync namespace mode. catalog_sync_namespace_mode: Option, + /// Optional flatten delimiter for namespace sync. catalog_sync_namespace_flatten_delimiter: Option, + /// Optional tags for the database. with_tags: Option>, + /// Optional contact entries for the database. with_contacts: Option>, }, /// ```sql @@ -3932,10 +4395,15 @@ pub enum Statement { /// CREATE PROCEDURE /// ``` CreateProcedure { + /// `OR ALTER` flag. or_alter: bool, + /// Procedure name. name: ObjectName, + /// Optional procedure parameters. params: Option>, + /// Optional language identifier. language: Option, + /// Procedure body statements. body: ConditionalStatements, }, /// ```sql @@ -3945,10 +4413,15 @@ pub enum Statement { /// Supported variants: /// 1. [DuckDB](https://duckdb.org/docs/sql/statements/create_macro) CreateMacro { + /// `OR REPLACE` flag. or_replace: bool, + /// Whether macro is temporary. temporary: bool, + /// Macro name. name: ObjectName, + /// Optional macro arguments. args: Option>, + /// Macro definition body. definition: MacroDefinition, }, /// ```sql @@ -3956,35 +4429,38 @@ pub enum Statement { /// ``` /// See CreateStage { + /// `OR REPLACE` flag for stage. or_replace: bool, + /// Whether stage is temporary. temporary: bool, + /// `IF NOT EXISTS` flag. if_not_exists: bool, + /// Stage name. name: ObjectName, + /// Stage parameters. stage_params: StageParamsObject, + /// Directory table parameters. directory_table_params: KeyValueOptions, + /// File format options. file_format: KeyValueOptions, + /// Copy options for stage. copy_options: KeyValueOptions, + /// Optional comment. comment: Option, }, /// ```sql /// ASSERT [AS ] /// ``` Assert { + /// Assertion condition expression. condition: Expr, + /// Optional message expression. message: Option, }, /// ```sql /// GRANT privileges ON objects TO grantees /// ``` - Grant { - privileges: Privileges, - objects: Option, - grantees: Vec, - with_grant_option: bool, - as_grantor: Option, - granted_by: Option, - current_grants: Option, - }, + Grant(Grant), /// ```sql /// DENY privileges ON object TO grantees /// ``` @@ -3992,20 +4468,16 @@ pub enum Statement { /// ```sql /// REVOKE privileges ON objects FROM grantees /// ``` - Revoke { - privileges: Privileges, - objects: Option, - grantees: Vec, - granted_by: Option, - cascade: Option, - }, + Revoke(Revoke), /// ```sql /// DEALLOCATE [ PREPARE ] { name | ALL } /// ``` /// /// Note: this is a PostgreSQL-specific statement. Deallocate { + /// Name to deallocate (or `ALL`). name: Ident, + /// Whether `PREPARE` keyword was present. prepare: bool, }, /// ```sql @@ -4017,12 +4489,17 @@ pub enum Statement { /// BigQuery: /// Snowflake: Execute { + /// Optional function/procedure name. name: Option, + /// Parameter expressions passed to execute. parameters: Vec, + /// Whether parentheses were present. has_parentheses: bool, - /// Is this an `EXECUTE IMMEDIATE` + /// Is this an `EXECUTE IMMEDIATE`. immediate: bool, + /// Identifiers to capture results into. into: Vec, + /// `USING` expressions with optional aliases. using: Vec, /// Whether the last parameter is the return value of the procedure /// MSSQL: @@ -4037,8 +4514,11 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement. Prepare { + /// Name of the prepared statement. name: Ident, + /// Optional data types for parameters. data_types: Vec, + /// Statement being prepared. statement: Box, }, /// ```sql @@ -4048,8 +4528,10 @@ pub enum Statement { /// See /// See Kill { + /// Optional kill modifier (CONNECTION, QUERY, MUTATION). modifier: Option, // processlist_id + /// The id of the process to kill. id: u64, }, /// ```sql @@ -4078,7 +4560,7 @@ pub enum Statement { describe_alias: DescribeAlias, /// Carry out the command and show actual run times and other statistics. analyze: bool, - // Display additional information regarding the plan. + /// Display additional information regarding the plan. verbose: bool, /// `EXPLAIN QUERY PLAN` /// Display the query plan without running the query. @@ -4100,12 +4582,14 @@ pub enum Statement { /// ``` /// Define a new savepoint within the current transaction Savepoint { + /// Name of the savepoint being defined. name: Ident, }, /// ```sql /// RELEASE [ SAVEPOINT ] savepoint_name /// ``` ReleaseSavepoint { + /// Name of the savepoint to release. name: Ident, }, /// A `MERGE` statement. @@ -4116,22 +4600,7 @@ pub enum Statement { /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) /// [MSSQL](https://learn.microsoft.com/en-us/sql/t-sql/statements/merge-transact-sql?view=sql-server-ver16) - Merge { - /// The `MERGE` token that starts the statement. - merge_token: AttachedToken, - /// optional INTO keyword - into: bool, - /// Specifies the table to merge - table: TableFactor, - /// Specifies the table or subquery to join with the target table - source: TableFactor, - /// Specifies the expression on which to join the target table and source - on: Box, - /// Specifies the actions to perform when values match or do not match. - clauses: Vec, - // Specifies the output to save changes in MSSQL - output: Option, - }, + Merge(Merge), /// ```sql /// CACHE [ FLAG ] TABLE [ OPTIONS('K1' = 'V1', 'K2' = V2) ] [ AS ] [ ] /// ``` @@ -4145,6 +4614,7 @@ pub enum Statement { /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, + /// `true` if `AS` keyword was present before the query. has_as: bool, /// Table confs options: Vec, @@ -4158,6 +4628,7 @@ pub enum Statement { /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, + /// `true` when `IF EXISTS` was present. if_exists: bool, }, /// ```sql @@ -4165,11 +4636,17 @@ pub enum Statement { /// ``` /// Define a new sequence: CreateSequence { + /// Whether the sequence is temporary. temporary: bool, + /// `IF NOT EXISTS` flag. if_not_exists: bool, + /// Sequence name. name: ObjectName, + /// Optional data type for the sequence. data_type: Option, + /// Sequence options (INCREMENT, MINVALUE, etc.). sequence_options: Vec, + /// Optional `OWNED BY` target. owned_by: Option, }, /// A `CREATE DOMAIN` statement. @@ -4178,22 +4655,34 @@ pub enum Statement { /// CREATE TYPE /// ``` CreateType { + /// Type name to create. name: ObjectName, + /// Optional type representation details. representation: Option, }, /// ```sql /// PRAGMA . = /// ``` Pragma { + /// Pragma name (possibly qualified). name: ObjectName, - value: Option, + /// Optional pragma value. + value: Option, + /// Whether the pragma used `=`. is_eq: bool, }, /// ```sql + /// LOCK [ TABLE ] [ ONLY ] name [ * ] [, ...] [ IN lockmode MODE ] [ NOWAIT ] + /// ``` + /// + /// See + Lock(Lock), + /// ```sql /// LOCK TABLES [READ [LOCAL] | [LOW_PRIORITY] WRITE] /// ``` /// Note: this is a MySQL-specific statement. See LockTables { + /// List of tables to lock with modes. tables: Vec, }, /// ```sql @@ -4213,24 +4702,53 @@ pub enum Statement { /// UNLOAD('statement') TO [ OPTIONS ] /// ``` Unload { + /// Optional query AST to unload. query: Option>, + /// Optional original query text. query_text: Option, + /// Destination identifier. to: Ident, + /// Optional IAM role/auth information. auth: Option, + /// Additional `WITH` options. with: Vec, + /// Legacy copy-style options. options: Vec, }, + /// ClickHouse: /// ```sql /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] /// ``` - /// /// See ClickHouse + /// + /// Databricks: + /// ```sql + /// OPTIMIZE table_name [WHERE predicate] [ZORDER BY (col_name1 [, ...])] + /// ``` + /// See Databricks OptimizeTable { + /// Table name to optimize. name: ObjectName, + /// Whether the `TABLE` keyword was present (ClickHouse uses `OPTIMIZE TABLE`, Databricks uses `OPTIMIZE`). + has_table_keyword: bool, + /// Optional cluster identifier. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) on_cluster: Option, + /// Optional partition spec. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) partition: Option, + /// Whether `FINAL` was specified. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) include_final: bool, + /// Optional deduplication settings. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) deduplicate: Option, + /// Optional WHERE predicate. + /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html) + predicate: Option, + /// Optional ZORDER BY columns. + /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html) + zorder: Option>, }, /// ```sql /// LISTEN @@ -4239,6 +4757,7 @@ pub enum Statement { /// /// See Postgres LISTEN { + /// Notification channel identifier. channel: Ident, }, /// ```sql @@ -4248,6 +4767,7 @@ pub enum Statement { /// /// See Postgres UNLISTEN { + /// Notification channel identifier. channel: Ident, }, /// ```sql @@ -4257,7 +4777,9 @@ pub enum Statement { /// /// See Postgres NOTIFY { + /// Notification channel identifier. channel: Ident, + /// Optional payload string. payload: Option, }, /// ```sql @@ -4269,11 +4791,17 @@ pub enum Statement { /// /// See Hive LoadData { + /// Whether `LOCAL` is present. local: bool, + /// Input path for files to load. inpath: String, + /// Whether `OVERWRITE` was specified. overwrite: bool, + /// Target table name to load into. table_name: ObjectName, + /// Optional partition specification. partitioned: Option>, + /// Optional table format information. table_format: Option, }, /// ```sql @@ -4296,18 +4824,29 @@ pub enum Statement { /// [ WITH option [ , ...n ] ] /// See RaisError { + /// Error message expression or identifier. message: Box, + /// Severity expression. severity: Box, + /// State expression. state: Box, + /// Substitution arguments for the message. arguments: Vec, + /// Additional `WITH` options for RAISERROR. options: Vec, }, + /// A MSSQL `THROW` statement. + Throw(ThrowStatement), /// ```sql /// PRINT msg_str | @local_variable | string_expr /// ``` /// /// See: Print(PrintStatement), + /// MSSQL `WAITFOR` statement. + /// + /// See: + WaitFor(WaitForStatement), /// ```sql /// RETURN [ expression ] /// ``` @@ -4362,6 +4901,12 @@ impl From for Statement { } } +impl From for Statement { + fn from(lock: Lock) -> Self { + Statement::Lock(lock) + } +} + impl From for Statement { fn from(msck: ddl::Msck) -> Self { Statement::Msck(msck) @@ -4377,7 +4922,9 @@ impl From for Statement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CurrentGrantsKind { + /// `COPY CURRENT GRANTS` (copy current grants to target). CopyCurrentGrants, + /// `REVOKE CURRENT GRANTS` (revoke current grants from target). RevokeCurrentGrants, } @@ -4393,9 +4940,14 @@ impl fmt::Display for CurrentGrantsKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `RAISERROR` options +/// See pub enum RaisErrorOption { + /// Log the error. Log, + /// Do not wait for completion. NoWait, + /// Set the error state. SetError, } @@ -4460,9 +5012,9 @@ impl fmt::Display for Statement { f, "{tables}{read}{export}", tables = if !tables.is_empty() { - " ".to_string() + &display_comma_separated(tables).to_string() + format!(" {}", display_comma_separated(tables)) } else { - "".to_string() + String::new() }, export = if *export { " FOR EXPORT" } else { "" }, read = if *read_lock { " WITH READ LOCK" } else { "" } @@ -4704,6 +5256,8 @@ impl fmt::Display for Statement { default_ddl_collation, storage_serialization_policy, comment, + default_charset, + default_collation, catalog_sync, catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter, @@ -4763,6 +5317,14 @@ impl fmt::Display for Statement { write!(f, " COMMENT = '{comment}'")?; } + if let Some(charset) = default_charset { + write!(f, " DEFAULT CHARACTER SET {charset}")?; + } + + if let Some(collation) = default_collation { + write!(f, " DEFAULT COLLATE {collation}")?; + } + if let Some(sync) = catalog_sync { write!(f, " CATALOG_SYNC = '{sync}'")?; } @@ -4935,48 +5497,7 @@ impl fmt::Display for Statement { Statement::CreateServer(stmt) => { write!(f, "{stmt}") } - Statement::CreatePolicy { - name, - table_name, - policy_type, - command, - to, - using, - with_check, - } => { - write!(f, "CREATE POLICY {name} ON {table_name}")?; - - if let Some(policy_type) = policy_type { - match policy_type { - CreatePolicyType::Permissive => write!(f, " AS PERMISSIVE")?, - CreatePolicyType::Restrictive => write!(f, " AS RESTRICTIVE")?, - } - } - - if let Some(command) = command { - match command { - CreatePolicyCommand::All => write!(f, " FOR ALL")?, - CreatePolicyCommand::Select => write!(f, " FOR SELECT")?, - CreatePolicyCommand::Insert => write!(f, " FOR INSERT")?, - CreatePolicyCommand::Update => write!(f, " FOR UPDATE")?, - CreatePolicyCommand::Delete => write!(f, " FOR DELETE")?, - } - } - - if let Some(to) = to { - write!(f, " TO {}", display_comma_separated(to))?; - } - - if let Some(using) = using { - write!(f, " USING ({using})")?; - } - - if let Some(with_check) = with_check { - write!(f, " WITH CHECK ({with_check})")?; - } - - Ok(()) - } + Statement::CreatePolicy(policy) => write!(f, "{policy}"), Statement::CreateConnector(create_connector) => create_connector.fmt(f), Statement::CreateOperator(create_operator) => create_operator.fmt(f), Statement::CreateOperatorFamily(create_operator_family) => { @@ -5005,16 +5526,17 @@ impl fmt::Display for Statement { Statement::AlterType(AlterType { name, operation }) => { write!(f, "ALTER TYPE {name} {operation}") } + Statement::AlterOperator(alter_operator) => write!(f, "{alter_operator}"), + Statement::AlterOperatorFamily(alter_operator_family) => { + write!(f, "{alter_operator_family}") + } + Statement::AlterOperatorClass(alter_operator_class) => { + write!(f, "{alter_operator_class}") + } Statement::AlterRole { name, operation } => { write!(f, "ALTER ROLE {name} {operation}") } - Statement::AlterPolicy { - name, - table_name, - operation, - } => { - write!(f, "ALTER POLICY {name} ON {table_name}{operation}") - } + Statement::AlterPolicy(alter_policy) => write!(f, "{alter_policy}"), Statement::AlterConnector { name, properties, @@ -5138,22 +5660,7 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::DropPolicy { - if_exists, - name, - table_name, - drop_behavior, - } => { - write!(f, "DROP POLICY")?; - if *if_exists { - write!(f, " IF EXISTS")?; - } - write!(f, " {name} ON {table_name}")?; - if let Some(drop_behavior) = drop_behavior { - write!(f, " {drop_behavior}")?; - } - Ok(()) - } + Statement::DropPolicy(policy) => write!(f, "{policy}"), Statement::DropConnector { if_exists, name } => { write!( f, @@ -5421,55 +5928,9 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::Grant { - privileges, - objects, - grantees, - with_grant_option, - as_grantor, - granted_by, - current_grants, - } => { - write!(f, "GRANT {privileges} ")?; - if let Some(objects) = objects { - write!(f, "ON {objects} ")?; - } - write!(f, "TO {}", display_comma_separated(grantees))?; - if *with_grant_option { - write!(f, " WITH GRANT OPTION")?; - } - if let Some(current_grants) = current_grants { - write!(f, " {current_grants}")?; - } - if let Some(grantor) = as_grantor { - write!(f, " AS {grantor}")?; - } - if let Some(grantor) = granted_by { - write!(f, " GRANTED BY {grantor}")?; - } - Ok(()) - } + Statement::Grant(grant) => write!(f, "{grant}"), Statement::Deny(s) => write!(f, "{s}"), - Statement::Revoke { - privileges, - objects, - grantees, - granted_by, - cascade, - } => { - write!(f, "REVOKE {privileges} ")?; - if let Some(objects) = objects { - write!(f, "ON {objects} ")?; - } - write!(f, "FROM {}", display_comma_separated(grantees))?; - if let Some(grantor) = granted_by { - write!(f, " GRANTED BY {grantor}")?; - } - if let Some(cascade) = cascade { - write!(f, " {cascade}")?; - } - Ok(()) - } + Statement::Revoke(revoke) => write!(f, "{revoke}"), Statement::Deallocate { name, prepare } => write!( f, "DEALLOCATE {prepare}{name}", @@ -5548,27 +6009,7 @@ impl fmt::Display for Statement { Statement::ReleaseSavepoint { name } => { write!(f, "RELEASE SAVEPOINT {name}") } - Statement::Merge { - merge_token: _, - into, - table, - source, - on, - clauses, - output, - } => { - write!( - f, - "MERGE{int} {table} USING {source} ", - int = if *into { " INTO" } else { "" } - )?; - write!(f, "ON {on} ")?; - write!(f, "{}", display_separated(clauses, " "))?; - if let Some(output) = output { - write!(f, " {output}")?; - } - Ok(()) - } + Statement::Merge(merge) => merge.fmt(f), Statement::Cache { table_name, table_flag, @@ -5760,6 +6201,7 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::Lock(lock) => lock.fmt(f), Statement::LockTables { tables } => { write!(f, "LOCK TABLES {}", display_comma_separated(tables)) } @@ -5795,12 +6237,19 @@ impl fmt::Display for Statement { } Statement::OptimizeTable { name, + has_table_keyword, on_cluster, partition, include_final, deduplicate, + predicate, + zorder, } => { - write!(f, "OPTIMIZE TABLE {name}")?; + write!(f, "OPTIMIZE")?; + if *has_table_keyword { + write!(f, " TABLE")?; + } + write!(f, " {name}")?; if let Some(on_cluster) = on_cluster { write!(f, " ON CLUSTER {on_cluster}")?; } @@ -5813,6 +6262,12 @@ impl fmt::Display for Statement { if let Some(deduplicate) = deduplicate { write!(f, " {deduplicate}")?; } + if let Some(predicate) = predicate { + write!(f, " WHERE {predicate}")?; + } + if let Some(zorder) = zorder { + write!(f, " ZORDER BY ({})", display_comma_separated(zorder))?; + } Ok(()) } Statement::LISTEN { channel } => { @@ -5850,7 +6305,9 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::Throw(s) => write!(f, "{s}"), Statement::Print(s) => write!(f, "{s}"), + Statement::WaitFor(s) => write!(f, "{s}"), Statement::Return(r) => write!(f, "{r}"), Statement::List(command) => write!(f, "LIST {command}"), Statement::Remove(command) => write!(f, "REMOVE {command}"), @@ -5874,11 +6331,17 @@ impl fmt::Display for Statement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SequenceOptions { + /// `INCREMENT [BY] ` option; second value indicates presence of `BY` keyword. IncrementBy(Expr, bool), + /// `MINVALUE ` or `NO MINVALUE`. MinValue(Option), + /// `MAXVALUE ` or `NO MAXVALUE`. MaxValue(Option), + /// `START [WITH] `; second value indicates presence of `WITH`. StartWith(Expr, bool), + /// `CACHE ` option. Cache(Expr), + /// `CYCLE` or `NO CYCLE` option. Cycle(bool), } @@ -5928,8 +6391,11 @@ impl fmt::Display for SequenceOptions { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct SetAssignment { + /// Optional context scope (e.g., SESSION or LOCAL). pub scope: Option, + /// Assignment target name. pub name: ObjectName, + /// Assigned expression value. pub value: Expr, } @@ -5955,10 +6421,18 @@ pub struct TruncateTableTarget { /// name of the table being truncated #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub name: ObjectName, - /// Postgres-specific option - /// [ TRUNCATE TABLE ONLY ] + /// Postgres-specific option: explicitly exclude descendants (also default without ONLY) + /// ```sql + /// TRUNCATE TABLE ONLY name + /// ``` /// pub only: bool, + /// Postgres-specific option: asterisk after table name to explicitly indicate descendants + /// ```sql + /// TRUNCATE TABLE name [ * ] + /// ``` + /// + pub has_asterisk: bool, } impl fmt::Display for TruncateTableTarget { @@ -5966,46 +6440,157 @@ impl fmt::Display for TruncateTableTarget { if self.only { write!(f, "ONLY ")?; }; - write!(f, "{}", self.name) + write!(f, "{}", self.name)?; + if self.has_asterisk { + write!(f, " *")?; + }; + Ok(()) } } -/// PostgreSQL identity option for TRUNCATE table -/// [ RESTART IDENTITY | CONTINUE IDENTITY ] +/// A `LOCK` statement. +/// +/// See #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum TruncateIdentityOption { - Restart, - Continue, -} - -/// Cascade/restrict option for Postgres TRUNCATE table, MySQL GRANT/REVOKE, etc. -/// [ CASCADE | RESTRICT ] -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum CascadeOption { - Cascade, - Restrict, +pub struct Lock { + /// List of tables to lock. + pub tables: Vec, + /// Lock mode. + pub lock_mode: Option, + /// Whether `NOWAIT` was specified. + pub nowait: bool, } -impl Display for CascadeOption { +impl fmt::Display for Lock { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - CascadeOption::Cascade => write!(f, "CASCADE"), - CascadeOption::Restrict => write!(f, "RESTRICT"), + write!(f, "LOCK TABLE {}", display_comma_separated(&self.tables))?; + if let Some(lock_mode) = &self.lock_mode { + write!(f, " IN {lock_mode} MODE")?; } + if self.nowait { + write!(f, " NOWAIT")?; + } + Ok(()) } } -/// Transaction started with [ TRANSACTION | WORK ] +/// Target of a `LOCK TABLE` command +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct LockTableTarget { + /// Name of the table being locked. + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub name: ObjectName, + /// Whether `ONLY` was specified to exclude descendant tables. + pub only: bool, + /// Whether `*` was specified to explicitly include descendant tables. + pub has_asterisk: bool, +} + +impl fmt::Display for LockTableTarget { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.only { + write!(f, "ONLY ")?; + } + write!(f, "{}", self.name)?; + if self.has_asterisk { + write!(f, " *")?; + } + Ok(()) + } +} + +/// PostgreSQL lock modes for `LOCK TABLE`. +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum LockTableMode { + /// `ACCESS SHARE` + AccessShare, + /// `ROW SHARE` + RowShare, + /// `ROW EXCLUSIVE` + RowExclusive, + /// `SHARE UPDATE EXCLUSIVE` + ShareUpdateExclusive, + /// `SHARE` + Share, + /// `SHARE ROW EXCLUSIVE` + ShareRowExclusive, + /// `EXCLUSIVE` + Exclusive, + /// `ACCESS EXCLUSIVE` + AccessExclusive, +} + +impl fmt::Display for LockTableMode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let text = match self { + Self::AccessShare => "ACCESS SHARE", + Self::RowShare => "ROW SHARE", + Self::RowExclusive => "ROW EXCLUSIVE", + Self::ShareUpdateExclusive => "SHARE UPDATE EXCLUSIVE", + Self::Share => "SHARE", + Self::ShareRowExclusive => "SHARE ROW EXCLUSIVE", + Self::Exclusive => "EXCLUSIVE", + Self::AccessExclusive => "ACCESS EXCLUSIVE", + }; + write!(f, "{text}") + } +} + +/// PostgreSQL identity option for TRUNCATE table +/// [ RESTART IDENTITY | CONTINUE IDENTITY ] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TruncateIdentityOption { + /// Restart identity values (RESTART IDENTITY). + Restart, + /// Continue identity values (CONTINUE IDENTITY). + Continue, +} + +/// Cascade/restrict option for Postgres TRUNCATE table, MySQL GRANT/REVOKE, etc. +/// [ CASCADE | RESTRICT ] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CascadeOption { + /// Apply cascading action (e.g., CASCADE). + Cascade, + /// Restrict the action (e.g., RESTRICT). + Restrict, +} + +impl Display for CascadeOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CascadeOption::Cascade => write!(f, "CASCADE"), + CascadeOption::Restrict => write!(f, "RESTRICT"), + } + } +} + +/// Transaction started with [ TRANSACTION | WORK | TRAN ] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum BeginTransactionKind { + /// Standard `TRANSACTION` keyword. Transaction, + /// Alternate `WORK` keyword. Work, + /// MSSQL shorthand `TRAN` keyword. + /// See + Tran, } impl Display for BeginTransactionKind { @@ -6013,6 +6598,7 @@ impl Display for BeginTransactionKind { match self { BeginTransactionKind::Transaction => write!(f, "TRANSACTION"), BeginTransactionKind::Work => write!(f, "WORK"), + BeginTransactionKind::Tran => write!(f, "TRAN"), } } } @@ -6023,11 +6609,11 @@ impl Display for BeginTransactionKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum MinMaxValue { - // clause is not specified + /// Clause is not specified. Empty, - // NO MINVALUE/NO MAXVALUE + /// NO MINVALUE / NO MAXVALUE. None, - // MINVALUE / MAXVALUE + /// `MINVALUE ` / `MAXVALUE `. Some(Expr), } @@ -6035,6 +6621,7 @@ pub enum MinMaxValue { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[non_exhaustive] +/// Behavior to apply for `INSERT` when a conflict occurs. pub enum OnInsert { /// ON DUPLICATE KEY UPDATE (MySQL when the key already exists, then execute an update instead) DuplicateKeyUpdate(Vec), @@ -6045,40 +6632,64 @@ pub enum OnInsert { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Optional aliases for `INSERT` targets: row alias and optional column aliases. pub struct InsertAliases { + /// Row alias (table-style alias) for the inserted values. pub row_alias: ObjectName, + /// Optional list of column aliases for the inserted values. pub col_aliases: Option>, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Optional alias for an `INSERT` table; i.e. the table to be inserted into +pub struct TableAliasWithoutColumns { + /// `true` if the aliases was explicitly introduced with the "AS" keyword + pub explicit: bool, + /// the alias name itself + pub alias: Ident, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `ON CONFLICT` clause representation. pub struct OnConflict { + /// Optional conflict target specifying columns or constraint. pub conflict_target: Option, + /// Action to take when a conflict occurs. pub action: OnConflictAction, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Target specification for an `ON CONFLICT` clause. pub enum ConflictTarget { + /// Target specified as a list of columns. Columns(Vec), + /// Target specified as a named constraint. OnConstraint(ObjectName), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Action to perform when an `ON CONFLICT` target is matched. pub enum OnConflictAction { + /// Do nothing on conflict. DoNothing, + /// Perform an update on conflict. DoUpdate(DoUpdate), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Details for `DO UPDATE` action of an `ON CONFLICT` clause. pub struct DoUpdate { - /// Column assignments + /// Column assignments to perform on update. pub assignments: Vec, - /// WHERE + /// Optional WHERE clause limiting the update. pub selection: Option, } @@ -6175,21 +6786,48 @@ impl fmt::Display for Privileges { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FetchDirection { - Count { limit: Value }, + /// Fetch a specific count of rows. + Count { + /// The limit value for the count. + limit: ValueWithSpan, + }, + /// Fetch the next row. Next, + /// Fetch the prior row. Prior, + /// Fetch the first row. First, + /// Fetch the last row. Last, - Absolute { limit: Value }, - Relative { limit: Value }, + /// Fetch an absolute row by index. + Absolute { + /// The absolute index value. + limit: ValueWithSpan, + }, + /// Fetch a row relative to the current position. + Relative { + /// The relative offset value. + limit: ValueWithSpan, + }, + /// Fetch all rows. All, // FORWARD // FORWARD count - Forward { limit: Option }, + /// Fetch forward by an optional limit. + Forward { + /// Optional forward limit. + limit: Option, + }, + /// Fetch all forward rows. ForwardAll, // BACKWARD // BACKWARD count - Backward { limit: Option }, + /// Fetch backward by an optional limit. + Backward { + /// Optional backward limit. + limit: Option, + }, + /// Fetch all backward rows. BackwardAll, } @@ -6241,7 +6879,9 @@ impl fmt::Display for FetchDirection { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FetchPosition { + /// Use `FROM ` position specifier. From, + /// Use `IN ` position specifier. In, } @@ -6261,71 +6901,125 @@ impl fmt::Display for FetchPosition { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Action { + /// Add a search optimization. AddSearchOptimization, + /// Apply an `APPLY` operation with a specific type. Apply { + /// The type of apply operation. apply_type: ActionApplyType, }, + /// Apply a budget operation. ApplyBudget, + /// Attach a listing. AttachListing, + /// Attach a policy. AttachPolicy, + /// Audit operation. Audit, + /// Bind a service endpoint. BindServiceEndpoint, + /// Connect permission. Connect, + /// Create action, optionally specifying an object type. Create { + /// Optional object type to create. obj_type: Option, }, + /// Actions related to database roles. DatabaseRole { + /// The role name. role: ObjectName, }, + /// Delete permission. Delete, + /// Drop permission. Drop, + /// Evolve schema permission. EvolveSchema, + /// Exec action (execute) with optional object type. Exec { + /// Optional execute object type. obj_type: Option, }, + /// Execute action with optional object type. Execute { + /// Optional execute object type. obj_type: Option, }, + /// Failover operation. Failover, + /// Use imported privileges. ImportedPrivileges, + /// Import a share. ImportShare, + /// Insert rows with optional column list. Insert { + /// Optional list of target columns for insert. columns: Option>, }, + /// Manage operation with a specific manage type. Manage { + /// The specific manage sub-type. manage_type: ActionManageType, }, + /// Manage releases. ManageReleases, + /// Manage versions. ManageVersions, + /// Modify operation with an optional modify type. Modify { + /// The optional modify sub-type. modify_type: Option, }, + /// Monitor operation with an optional monitor type. Monitor { + /// The optional monitor sub-type. monitor_type: Option, }, + /// Operate permission. Operate, + /// Override share restrictions. OverrideShareRestrictions, + /// Ownership permission. Ownership, + /// Purchase a data exchange listing. PurchaseDataExchangeListing, + + /// Read access. Read, + /// Read session-level access. ReadSession, + /// References with optional column list. References { + /// Optional list of referenced column identifiers. columns: Option>, }, + /// Replication permission. Replicate, + /// Resolve all references. ResolveAll, + /// Role-related permission with target role name. Role { + /// The target role name. role: ObjectName, }, + /// Select permission with optional column list. Select { + /// Optional list of selected columns. columns: Option>, }, + /// Temporary object permission. Temporary, + /// Trigger-related permission. Trigger, + /// Truncate permission. Truncate, + /// Update permission with optional affected columns. Update { + /// Optional list of columns affected by update. columns: Option>, }, + /// Usage permission. Usage, } @@ -6419,22 +7113,39 @@ impl fmt::Display for Action { /// See /// under `globalPrivileges` in the `CREATE` privilege. pub enum ActionCreateObjectType { + /// An account-level object. Account, + /// An application object. Application, + /// An application package object. ApplicationPackage, + /// A compute pool object. ComputePool, + /// A data exchange listing. DataExchangeListing, + /// A database object. Database, + /// An external volume object. ExternalVolume, + /// A failover group object. FailoverGroup, + /// An integration object. Integration, + /// A network policy object. NetworkPolicy, + /// An organization listing. OrganiationListing, + /// A replication group object. ReplicationGroup, + /// A role object. Role, + /// A schema object. Schema, + /// A share object. Share, + /// A user object. User, + /// A warehouse object. Warehouse, } @@ -6468,15 +7179,25 @@ impl fmt::Display for ActionCreateObjectType { /// See /// under `globalPrivileges` in the `APPLY` privilege. pub enum ActionApplyType { + /// Apply an aggregation policy. AggregationPolicy, + /// Apply an authentication policy. AuthenticationPolicy, + /// Apply a join policy. JoinPolicy, + /// Apply a masking policy. MaskingPolicy, + /// Apply a packages policy. PackagesPolicy, + /// Apply a password policy. PasswordPolicy, + /// Apply a projection policy. ProjectionPolicy, + /// Apply a row access policy. RowAccessPolicy, + /// Apply a session policy. SessionPolicy, + /// Apply a tag. Tag, } @@ -6503,10 +7224,15 @@ impl fmt::Display for ActionApplyType { /// See /// under `globalPrivileges` in the `EXECUTE` privilege. pub enum ActionExecuteObjectType { + /// Alert object. Alert, + /// Data metric function object. DataMetricFunction, + /// Managed alert object. ManagedAlert, + /// Managed task object. ManagedTask, + /// Task object. Task, } @@ -6528,12 +7254,19 @@ impl fmt::Display for ActionExecuteObjectType { /// See /// under `globalPrivileges` in the `MANAGE` privilege. pub enum ActionManageType { + /// Account support cases management. AccountSupportCases, + /// Event sharing management. EventSharing, + /// Grants management. Grants, + /// Listing auto-fulfillment management. ListingAutoFulfillment, + /// Organization support cases management. OrganizationSupportCases, + /// User support cases management. UserSupportCases, + /// Warehouses management. Warehouses, } @@ -6557,9 +7290,13 @@ impl fmt::Display for ActionManageType { /// See /// under `globalPrivileges` in the `MODIFY` privilege. pub enum ActionModifyType { + /// Modify log level. LogLevel, + /// Modify trace level. TraceLevel, + /// Modify session log level. SessionLogLevel, + /// Modify session trace level. SessionTraceLevel, } @@ -6580,8 +7317,11 @@ impl fmt::Display for ActionModifyType { /// See /// under `globalPrivileges` in the `MONITOR` privilege. pub enum ActionMonitorType { + /// Monitor execution. Execution, + /// Monitor security. Security, + /// Monitor usage. Usage, } @@ -6600,7 +7340,9 @@ impl fmt::Display for ActionMonitorType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Grantee { + /// The category/type of grantee (role, user, share, etc.). pub grantee_type: GranteesType, + /// Optional name of the grantee (identifier or user@host). pub name: Option, } @@ -6643,15 +7385,25 @@ impl fmt::Display for Grantee { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The kind of principal receiving privileges. pub enum GranteesType { + /// A role principal. Role, + /// A share principal. Share, + /// A user principal. User, + /// A group principal. Group, + /// The public principal. Public, + /// A database role principal. DatabaseRole, + /// An application principal. Application, + /// An application role principal. ApplicationRole, + /// No specific principal (e.g. `NONE`). None, } @@ -6663,7 +7415,12 @@ pub enum GranteeName { /// A bare identifier ObjectName(ObjectName), /// A MySQL user/host pair such as 'root'@'%' - UserHost { user: Ident, host: Ident }, + UserHost { + /// The user identifier portion. + user: Ident, + /// The host identifier portion. + host: Ident, + }, } impl fmt::Display for GranteeName { @@ -6683,29 +7440,65 @@ impl fmt::Display for GranteeName { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GrantObjects { /// Grant privileges on `ALL SEQUENCES IN SCHEMA [, ...]` - AllSequencesInSchema { schemas: Vec }, + AllSequencesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL TABLES IN SCHEMA [, ...]` - AllTablesInSchema { schemas: Vec }, + AllTablesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL VIEWS IN SCHEMA [, ...]` - AllViewsInSchema { schemas: Vec }, + AllViewsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL MATERIALIZED VIEWS IN SCHEMA [, ...]` - AllMaterializedViewsInSchema { schemas: Vec }, + AllMaterializedViewsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL EXTERNAL TABLES IN SCHEMA [, ...]` - AllExternalTablesInSchema { schemas: Vec }, + AllExternalTablesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL FUNCTIONS IN SCHEMA [, ...]` - AllFunctionsInSchema { schemas: Vec }, + AllFunctionsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE SCHEMAS IN DATABASE [, ...]` - FutureSchemasInDatabase { databases: Vec }, + FutureSchemasInDatabase { + /// The target database names. + databases: Vec, + }, /// Grant privileges on `FUTURE TABLES IN SCHEMA [, ...]` - FutureTablesInSchema { schemas: Vec }, + FutureTablesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE VIEWS IN SCHEMA [, ...]` - FutureViewsInSchema { schemas: Vec }, + FutureViewsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE EXTERNAL TABLES IN SCHEMA [, ...]` - FutureExternalTablesInSchema { schemas: Vec }, + FutureExternalTablesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE MATERIALIZED VIEWS IN SCHEMA [, ...]` - FutureMaterializedViewsInSchema { schemas: Vec }, + FutureMaterializedViewsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE SEQUENCES IN SCHEMA [, ...]` - FutureSequencesInSchema { schemas: Vec }, + FutureSequencesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on specific databases Databases(Vec), /// Grant privileges on specific schemas @@ -6740,7 +7533,9 @@ pub enum GrantObjects { /// For example: /// `GRANT USAGE ON PROCEDURE foo(varchar) TO ROLE role1` Procedure { + /// The procedure name. name: ObjectName, + /// Optional argument types for overloaded procedures. arg_types: Vec, }, @@ -6750,7 +7545,9 @@ pub enum GrantObjects { /// For example: /// `GRANT USAGE ON FUNCTION foo(varchar) TO ROLE role1` Function { + /// The function name. name: ObjectName, + /// Optional argument types for overloaded functions. arg_types: Vec, }, } @@ -6909,10 +7706,15 @@ impl fmt::Display for GrantObjects { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DenyStatement { + /// The privileges to deny. pub privileges: Privileges, + /// The objects the privileges apply to. pub objects: GrantObjects, + /// The grantees (users/roles) to whom the denial applies. pub grantees: Vec, + /// Optional identifier of the principal that performed the grant. pub granted_by: Option, + /// Optional cascade option controlling dependent objects. pub cascade: Option, } @@ -6938,7 +7740,9 @@ impl fmt::Display for DenyStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Assignment { + /// The left-hand side of the assignment. pub target: AssignmentTarget, + /// The expression assigned to the target. pub value: Expr, } @@ -6973,12 +7777,18 @@ impl fmt::Display for AssignmentTarget { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Expression forms allowed as a function argument. pub enum FunctionArgExpr { + /// A normal expression argument. Expr(Expr), /// Qualified wildcard, e.g. `alias.*` or `schema.table.*`. QualifiedWildcard(ObjectName), - /// An unqualified `*` + /// An unqualified `*` wildcard. Wildcard, + /// An unqualified `*` wildcard with additional options, e.g. `* EXCLUDE(col)`. + /// + /// Used in Snowflake to support expressions like `HASH(* EXCLUDE(col))`. + WildcardWithOptions(WildcardAdditionalOptions), } impl From for FunctionArgExpr { @@ -6997,6 +7807,7 @@ impl fmt::Display for FunctionArgExpr { FunctionArgExpr::Expr(expr) => write!(f, "{expr}"), FunctionArgExpr::QualifiedWildcard(prefix) => write!(f, "{prefix}.*"), FunctionArgExpr::Wildcard => f.write_str("*"), + FunctionArgExpr::WildcardWithOptions(opts) => write!(f, "*{opts}"), } } } @@ -7033,23 +7844,31 @@ impl fmt::Display for FunctionArgOperator { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Forms of function arguments (named, expression-named, or positional). pub enum FunctionArg { /// `name` is identifier /// /// Enabled when `Dialect::supports_named_fn_args_with_expr_name` returns 'false' Named { + /// The identifier name of the argument. name: Ident, + /// The argument expression or wildcard form. arg: FunctionArgExpr, + /// The operator separating name and value. operator: FunctionArgOperator, }, /// `name` is arbitrary expression /// /// Enabled when `Dialect::supports_named_fn_args_with_expr_name` returns 'true' ExprNamed { + /// The expression used as the argument name. name: Expr, + /// The argument expression or wildcard form. arg: FunctionArgExpr, + /// The operator separating name and value. operator: FunctionArgOperator, }, + /// An unnamed argument (positional), given by expression or wildcard. Unnamed(FunctionArgExpr), } @@ -7074,9 +7893,15 @@ impl fmt::Display for FunctionArg { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Which cursor(s) to close. pub enum CloseCursor { + /// Close all cursors. All, - Specific { name: Ident }, + /// Close a specific cursor by name. + Specific { + /// The name of the cursor to close. + name: Ident, + }, } impl fmt::Display for CloseCursor { @@ -7108,6 +7933,7 @@ pub struct DropDomain { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TypedString { + /// The data type of the typed string (e.g. DATE, TIME, TIMESTAMP). pub data_type: DataType, /// The value of the constant. /// Hint: you can unwrap the string value using `value.into_string()`. @@ -7152,6 +7978,7 @@ impl fmt::Display for TypedString { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Function { + /// The function name (may be qualified). pub name: ObjectName, /// Flags whether this function call uses the [ODBC syntax]. /// @@ -7293,6 +8120,7 @@ impl fmt::Display for FunctionArgumentList { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Clauses that can appear inside a function argument list. pub enum FunctionArgumentClause { /// Indicates how `NULL`s should be handled in the calculation, e.g. in `FIRST_VALUE` on [BigQuery]. /// @@ -7325,7 +8153,7 @@ pub enum FunctionArgumentClause { /// The `SEPARATOR` clause to the [`GROUP_CONCAT`] function in MySQL. /// /// [`GROUP_CONCAT`]: https://dev.mysql.com/doc/refman/8.0/en/aggregate-functions.html#function_group-concat - Separator(Value), + Separator(ValueWithSpan), /// The `ON NULL` clause for some JSON functions. /// /// [MSSQL `JSON_ARRAY`](https://learn.microsoft.com/en-us/sql/t-sql/functions/json-array-transact-sql?view=sql-server-ver16) @@ -7364,8 +8192,10 @@ impl fmt::Display for FunctionArgumentClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Method { + /// The expression on which the method is invoked. pub expr: Box, // always non-empty + /// The sequence of chained method calls. pub method_chain: Vec, } @@ -7383,8 +8213,9 @@ impl fmt::Display for Method { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// How duplicate values are treated inside function argument lists. pub enum DuplicateTreatment { - /// Perform the calculation only unique values. + /// Consider only unique values. Distinct, /// Retain all duplicate values (the default). All, @@ -7402,10 +8233,11 @@ impl fmt::Display for DuplicateTreatment { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// How the `ANALYZE`/`EXPLAIN ANALYZE` format is specified. pub enum AnalyzeFormatKind { - /// e.g. `EXPLAIN ANALYZE FORMAT JSON SELECT * FROM tbl` + /// Format provided as a keyword, e.g. `FORMAT JSON`. Keyword(AnalyzeFormat), - /// e.g. `EXPLAIN ANALYZE FORMAT=JSON SELECT * FROM tbl` + /// Format provided as an assignment, e.g. `FORMAT=JSON`. Assignment(AnalyzeFormat), } @@ -7421,11 +8253,17 @@ impl fmt::Display for AnalyzeFormatKind { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Output formats supported for `ANALYZE`/`EXPLAIN ANALYZE`. pub enum AnalyzeFormat { + /// Plain text format. TEXT, + /// Graphviz DOT format. GRAPHVIZ, + /// JSON format. JSON, + /// Traditional explain output. TRADITIONAL, + /// Tree-style explain output. TREE, } @@ -7446,12 +8284,19 @@ impl fmt::Display for AnalyzeFormat { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FileFormat { + /// Text file format. TEXTFILE, + /// Sequence file format. SEQUENCEFILE, + /// ORC file format. ORC, + /// Parquet file format. PARQUET, + /// Avro file format. AVRO, + /// RCFile format. RCFILE, + /// JSON file format. JSONFILE, } @@ -7480,7 +8325,9 @@ pub enum ListAggOnOverflow { /// `ON OVERFLOW TRUNCATE [ ] WITH[OUT] COUNT` Truncate { + /// Optional filler expression used when truncating. filler: Option>, + /// Whether to include a count when truncating. with_count: bool, }, } @@ -7521,8 +8368,11 @@ impl fmt::Display for HavingBound { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Which bound is used in a HAVING clause for ANY_VALUE on BigQuery. pub enum HavingBoundKind { + /// The minimum bound. Min, + /// The maximum bound. Max, } @@ -7538,18 +8388,31 @@ impl fmt::Display for HavingBoundKind { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Types of database objects referenced by DDL statements. pub enum ObjectType { + /// A table. Table, + /// A view. View, + /// A materialized view. MaterializedView, + /// An index. Index, + /// A schema. Schema, + /// A database. Database, + /// A role. Role, + /// A sequence. Sequence, + /// A stage. Stage, + /// A type definition. Type, + /// A user. User, + /// A stream. Stream, } @@ -7575,9 +8438,13 @@ impl fmt::Display for ObjectType { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Types supported by `KILL` statements. pub enum KillType { + /// Kill a connection. Connection, + /// Kill a running query. Query, + /// Kill a mutation (ClickHouse). Mutation, } @@ -7596,39 +8463,62 @@ impl fmt::Display for KillType { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Distribution style options for Hive tables. pub enum HiveDistributionStyle { + /// Partitioned distribution with the given columns. PARTITIONED { + /// Columns used for partitioning. columns: Vec, }, + /// Skewed distribution definition. SKEWED { + /// Columns participating in the skew definition. columns: Vec, + /// Columns listed in the `ON` clause for skewing. on: Vec, + /// Whether skewed data is stored as directories. stored_as_directories: bool, }, + /// No distribution style specified. NONE, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Row format specification for Hive tables (SERDE or DELIMITED). pub enum HiveRowFormat { - SERDE { class: String }, - DELIMITED { delimiters: Vec }, + /// SerDe class specification with the implementing class name. + SERDE { + /// The SerDe implementation class name. + class: String, + }, + /// Delimited row format with one or more delimiter specifications. + DELIMITED { + /// The list of delimiters used for delimiting fields/lines. + delimiters: Vec, + }, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Format specification for `LOAD DATA` Hive operations. pub struct HiveLoadDataFormat { + /// SerDe expression used for the table. pub serde: Expr, + /// Input format expression. pub input_format: Expr, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single row delimiter specification for Hive `ROW FORMAT`. pub struct HiveRowDelimiter { + /// The delimiter kind (fields/lines/etc.). pub delimiter: HiveDelimiter, + /// The delimiter character identifier. pub char: Ident, } @@ -7642,12 +8532,19 @@ impl fmt::Display for HiveRowDelimiter { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Kind of delimiter used in Hive `ROW FORMAT` definitions. pub enum HiveDelimiter { + /// Fields terminated by a delimiter. FieldsTerminatedBy, + /// Fields escaped by a character. FieldsEscapedBy, + /// Collection items terminated by a delimiter. CollectionItemsTerminatedBy, + /// Map keys terminated by a delimiter. MapKeysTerminatedBy, + /// Lines terminated by a delimiter. LinesTerminatedBy, + /// Null represented by a specific token. NullDefinedAs, } @@ -7668,8 +8565,11 @@ impl fmt::Display for HiveDelimiter { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Describe output format options for Hive `DESCRIBE`/`EXPLAIN`. pub enum HiveDescribeFormat { + /// Extended describe output. Extended, + /// Formatted describe output. Formatted, } @@ -7686,9 +8586,13 @@ impl fmt::Display for HiveDescribeFormat { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Aliases accepted for describe-style commands. pub enum DescribeAlias { + /// `DESCRIBE` alias. Describe, + /// `EXPLAIN` alias. Explain, + /// `DESC` alias. Desc, } @@ -7707,12 +8611,18 @@ impl fmt::Display for DescribeAlias { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[allow(clippy::large_enum_variant)] +/// Hive input/output format specification used in `CREATE TABLE`. pub enum HiveIOFormat { + /// Generic IO format with separate input and output expressions. IOF { + /// Expression for the input format. input_format: Expr, + /// Expression for the output format. output_format: Expr, }, + /// File format wrapper referencing a `FileFormat` variant. FileFormat { + /// The file format used for storage. format: FileFormat, }, } @@ -7720,18 +8630,26 @@ pub enum HiveIOFormat { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Hive table format and storage-related options. pub struct HiveFormat { + /// Optional row format specification. pub row_format: Option, + /// Optional SerDe properties expressed as SQL options. pub serde_properties: Option>, + /// Optional input/output storage format details. pub storage: Option, + /// Optional location (URI or path) for table data. pub location: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A clustered index column specification. pub struct ClusteredIndex { + /// Column identifier for the clustered index entry. pub name: Ident, + /// Optional sort direction: `Some(true)` for ASC, `Some(false)` for DESC, `None` for unspecified. pub asc: Option, } @@ -7749,9 +8667,13 @@ impl fmt::Display for ClusteredIndex { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Clustered options used for `CREATE TABLE` clustered/indexed storage. pub enum TableOptionsClustered { + /// Use a columnstore index. ColumnstoreIndex, + /// Columnstore index with an explicit ordering of columns. ColumnstoreIndexOrder(Vec), + /// A named clustered index with one or more columns. Index(Vec), } @@ -7776,17 +8698,20 @@ impl fmt::Display for TableOptionsClustered { } /// Specifies which partition the boundary values on table partitioning belongs to. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum PartitionRangeDirection { + /// LEFT range direction. Left, + /// RIGHT range direction. Right, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// SQL option syntax used in table and server definitions. pub enum SqlOption { /// Clustered represents the clustered version of table storage for MSSQL. /// @@ -7799,7 +8724,12 @@ pub enum SqlOption { /// Any option that consists of a key value pair where the value is an expression. e.g. /// /// WITH(DISTRIBUTION = ROUND_ROBIN) - KeyValue { key: Ident, value: Expr }, + KeyValue { + /// The option key identifier. + key: Ident, + /// The expression value for the option. + value: Expr, + }, /// One or more table partitions and represents which partition the boundary values belong to, /// e.g. /// @@ -7807,8 +8737,11 @@ pub enum SqlOption { /// /// Partition { + /// The partition column name. column_name: Ident, + /// Optional direction for the partition range (LEFT/RIGHT). range_direction: Option, + /// Values that define the partition boundaries. for_values: Vec, }, /// Comment parameter (supports `=` and no `=` syntax) @@ -7887,8 +8820,11 @@ impl fmt::Display for SqlOption { #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Storage type options for a tablespace. pub enum StorageType { + /// Store on disk. Disk, + /// Store in memory. Memory, } @@ -7898,15 +8834,20 @@ pub enum StorageType { /// MySql TableSpace option /// pub struct TablespaceOption { + /// Name of the tablespace. pub name: String, + /// Optional storage type for the tablespace. pub storage: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A key/value identifier pair used for secret or key-based options. pub struct SecretOption { + /// The option key identifier. pub key: Ident, + /// The option value identifier. pub value: Ident, } @@ -7923,11 +8864,17 @@ impl fmt::Display for SecretOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateServerStatement { + /// The server name. pub name: ObjectName, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, + /// Optional server type identifier. pub server_type: Option, + /// Optional server version identifier. pub version: Option, + /// Foreign-data wrapper object name. pub foreign_data_wrapper: ObjectName, + /// Optional list of server options. pub options: Option>, } @@ -7966,11 +8913,14 @@ impl fmt::Display for CreateServerStatement { } } +/// A key/value option for `CREATE SERVER`. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateServerOption { + /// Option key identifier. pub key: Ident, + /// Option value identifier. pub value: Ident, } @@ -7983,8 +8933,11 @@ impl fmt::Display for CreateServerOption { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Options supported by DuckDB for `ATTACH DATABASE`. pub enum AttachDuckDBDatabaseOption { + /// READ_ONLY option, optional boolean value. ReadOnly(Option), + /// TYPE option specifying a database type identifier. Type(Ident), } @@ -8002,8 +8955,11 @@ impl fmt::Display for AttachDuckDBDatabaseOption { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Mode for transactions: access mode or isolation level. pub enum TransactionMode { + /// Access mode for a transaction (e.g. `READ ONLY` / `READ WRITE`). AccessMode(TransactionAccessMode), + /// Isolation level for a transaction (e.g. `SERIALIZABLE`). IsolationLevel(TransactionIsolationLevel), } @@ -8020,8 +8976,11 @@ impl fmt::Display for TransactionMode { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Transaction access mode (READ ONLY / READ WRITE). pub enum TransactionAccessMode { + /// READ ONLY access mode. ReadOnly, + /// READ WRITE access mode. ReadWrite, } @@ -8038,11 +8997,17 @@ impl fmt::Display for TransactionAccessMode { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Transaction isolation levels. pub enum TransactionIsolationLevel { + /// READ UNCOMMITTED isolation level. ReadUncommitted, + /// READ COMMITTED isolation level. ReadCommitted, + /// REPEATABLE READ isolation level. RepeatableRead, + /// SERIALIZABLE isolation level. Serializable, + /// SNAPSHOT isolation level. Snapshot, } @@ -8067,10 +9032,15 @@ impl fmt::Display for TransactionIsolationLevel { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TransactionModifier { + /// DEFERRED transaction modifier. Deferred, + /// IMMEDIATE transaction modifier. Immediate, + /// EXCLUSIVE transaction modifier. Exclusive, + /// TRY block modifier (MS-SQL style TRY/CATCH). Try, + /// CATCH block modifier (MS-SQL style TRY/CATCH). Catch, } @@ -8090,10 +9060,15 @@ impl fmt::Display for TransactionModifier { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Filter forms usable in SHOW statements. pub enum ShowStatementFilter { + /// Filter using LIKE pattern. Like(String), + /// Filter using ILIKE pattern. ILike(String), + /// Filter using a WHERE expression. Where(Expr), + /// Filter provided without a keyword (raw string). NoKeyword(String), } @@ -8112,8 +9087,11 @@ impl fmt::Display for ShowStatementFilter { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Clause types used with SHOW ... IN/FROM. pub enum ShowStatementInClause { + /// Use the `IN` clause. IN, + /// Use the `FROM` clause. FROM, } @@ -8135,10 +9113,15 @@ impl fmt::Display for ShowStatementInClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SqliteOnConflict { + /// Use ROLLBACK on conflict. Rollback, + /// Use ABORT on conflict. Abort, + /// Use FAIL on conflict. Fail, + /// Use IGNORE on conflict. Ignore, + /// Use REPLACE on conflict. Replace, } @@ -8164,8 +9147,11 @@ impl fmt::Display for SqliteOnConflict { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum MysqlInsertPriority { + /// LOW_PRIORITY modifier for INSERT/REPLACE. LowPriority, + /// DELAYED modifier for INSERT/REPLACE. Delayed, + /// HIGH_PRIORITY modifier for INSERT/REPLACE. HighPriority, } @@ -8183,7 +9169,9 @@ impl fmt::Display for crate::ast::MysqlInsertPriority { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Source for the `COPY` command: a table or a query. pub enum CopySource { + /// Copy from a table with optional column list. Table { /// The name of the table to copy from. table_name: ObjectName, @@ -8191,19 +9179,25 @@ pub enum CopySource { /// are copied. columns: Vec, }, + /// Copy from the results of a query. Query(Box), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Target for the `COPY` command: STDIN, STDOUT, a file, or a program. pub enum CopyTarget { + /// Use standard input as the source. Stdin, + /// Use standard output as the target. Stdout, + /// Read from or write to a file. File { /// The path name of the input or output file. filename: String, }, + /// Use a program as the source or target (shell command). Program { /// A command to execute command: String, @@ -8229,9 +9223,13 @@ impl fmt::Display for CopyTarget { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Action to take `ON COMMIT` for temporary tables. pub enum OnCommit { + /// Delete rows on commit. DeleteRows, + /// Preserve rows on commit. PreserveRows, + /// Drop the table on commit. Drop, } @@ -8314,7 +9312,12 @@ pub enum CopyLegacyOption { /// CLEANPATH CleanPath, /// COMPUPDATE [ PRESET | { ON | TRUE } | { OFF | FALSE } ] - CompUpdate { preset: bool, enabled: Option }, + CompUpdate { + /// Whether the COMPUPDATE PRESET option was used. + preset: bool, + /// Optional enabled flag for COMPUPDATE. + enabled: Option, + }, /// CSV ... Csv(Vec), /// DATEFORMAT \[ AS \] {'dateformat_string' | 'auto' } @@ -8323,8 +9326,11 @@ pub enum CopyLegacyOption { Delimiter(char), /// EMPTYASNULL EmptyAsNull, - /// ENCRYPTED \[ AUTO \] - Encrypted { auto: bool }, + /// `ENCRYPTED \[ AUTO \]` + Encrypted { + /// Whether `AUTO` was specified for encryption. + auto: bool, + }, /// ESCAPE Escape, /// EXTENSION 'extension-name' @@ -8339,15 +9345,18 @@ pub enum CopyLegacyOption { IamRole(IamRoleKind), /// IGNOREHEADER \[ AS \] number_rows IgnoreHeader(u64), - /// JSON - Json, + /// JSON \[ AS \] 'json_option' + Json(Option), /// MANIFEST \[ VERBOSE \] - Manifest { verbose: bool }, + Manifest { + /// Whether the MANIFEST is verbose. + verbose: bool, + }, /// MAXFILESIZE \[ AS \] max-size \[ MB | GB \] MaxFileSize(FileSize), - /// NULL \[ AS \] 'null_string' + /// `NULL \[ AS \] 'null_string'` Null(String), - /// PARALLEL [ { ON | TRUE } | { OFF | FALSE } ] + /// `PARALLEL [ { ON | TRUE } | { OFF | FALSE } ]` Parallel(Option), /// PARQUET Parquet, @@ -8367,6 +9376,9 @@ pub enum CopyLegacyOption { TruncateColumns, /// ZSTD Zstd, + /// Redshift `CREDENTIALS 'auth-args'` + /// + Credentials(String), } impl fmt::Display for CopyLegacyOption { @@ -8431,7 +9443,13 @@ impl fmt::Display for CopyLegacyOption { Header => write!(f, "HEADER"), IamRole(role) => write!(f, "IAM_ROLE {role}"), IgnoreHeader(num_rows) => write!(f, "IGNOREHEADER {num_rows}"), - Json => write!(f, "JSON"), + Json(opt) => { + write!(f, "JSON")?; + if let Some(opt) = opt { + write!(f, " AS '{}'", value::escape_single_quote_string(opt))?; + } + Ok(()) + } Manifest { verbose } => write!(f, "MANIFEST{}", if *verbose { " VERBOSE" } else { "" }), MaxFileSize(file_size) => write!(f, "MAXFILESIZE {file_size}"), Null(string) => write!(f, "NULL '{}'", value::escape_single_quote_string(string)), @@ -8471,6 +9489,7 @@ impl fmt::Display for CopyLegacyOption { } TruncateColumns => write!(f, "TRUNCATECOLUMNS"), Zstd => write!(f, "ZSTD"), + Credentials(s) => write!(f, "CREDENTIALS '{}'", value::escape_single_quote_string(s)), } } } @@ -8482,7 +9501,9 @@ impl fmt::Display for CopyLegacyOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FileSize { - pub size: Value, + /// Numeric size value. + pub size: ValueWithSpan, + /// Optional unit for the size (MB or GB). pub unit: Option, } @@ -8496,11 +9517,14 @@ impl fmt::Display for FileSize { } } +/// Units for `FileSize` (MB or GB). #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FileSizeUnit { + /// Megabytes. MB, + /// Gigabytes. GB, } @@ -8522,7 +9546,9 @@ impl fmt::Display for FileSizeUnit { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct UnloadPartitionBy { + /// Columns used to partition the unload output. pub columns: Vec, + /// Whether to include the partition in the output. pub include: bool, } @@ -8593,264 +9619,18 @@ impl fmt::Display for CopyLegacyCsvOption { } } -/// Variant of `WHEN` clause used within a `MERGE` Statement. -/// -/// Example: -/// ```sql -/// MERGE INTO T USING U ON FALSE WHEN MATCHED THEN DELETE -/// ``` -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeClauseKind { - /// `WHEN MATCHED` - Matched, - /// `WHEN NOT MATCHED` - NotMatched, - /// `WHEN MATCHED BY TARGET` - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - NotMatchedByTarget, - /// `WHEN MATCHED BY SOURCE` - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - NotMatchedBySource, -} - -impl Display for MergeClauseKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeClauseKind::Matched => write!(f, "MATCHED"), - MergeClauseKind::NotMatched => write!(f, "NOT MATCHED"), - MergeClauseKind::NotMatchedByTarget => write!(f, "NOT MATCHED BY TARGET"), - MergeClauseKind::NotMatchedBySource => write!(f, "NOT MATCHED BY SOURCE"), - } - } -} - -/// The type of expression used to insert rows within a `MERGE` statement. -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeInsertKind { - /// The insert expression is defined from an explicit `VALUES` clause - /// - /// Example: - /// ```sql - /// INSERT VALUES(product, quantity) - /// ``` - Values(Values), - /// The insert expression is defined using only the `ROW` keyword. - /// - /// Example: - /// ```sql - /// INSERT ROW - /// ``` - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - Row, -} - -impl Display for MergeInsertKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeInsertKind::Values(values) => { - write!(f, "{values}") - } - MergeInsertKind::Row => { - write!(f, "ROW") - } - } - } -} - -/// The expression used to insert rows within a `MERGE` statement. -/// -/// Examples -/// ```sql -/// INSERT (product, quantity) VALUES(product, quantity) -/// INSERT ROW -/// ``` -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MergeInsertExpr { - /// The `INSERT` token that starts the sub-expression. - pub insert_token: AttachedToken, - /// Columns (if any) specified by the insert. - /// - /// Example: - /// ```sql - /// INSERT (product, quantity) VALUES(product, quantity) - /// INSERT (product, quantity) ROW - /// ``` - pub columns: Vec, - /// The token, `[VALUES | ROW]` starting `kind`. - pub kind_token: AttachedToken, - /// The insert type used by the statement. - pub kind: MergeInsertKind, -} - -impl Display for MergeInsertExpr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if !self.columns.is_empty() { - write!(f, "({}) ", display_comma_separated(self.columns.as_slice()))?; - } - write!(f, "{}", self.kind) - } -} - -/// Underlying statement of a when clause within a `MERGE` Statement -/// -/// Example -/// ```sql -/// INSERT (product, quantity) VALUES(product, quantity) -/// ``` -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeAction { - /// An `INSERT` clause - /// - /// Example: - /// ```sql - /// INSERT (product, quantity) VALUES(product, quantity) - /// ``` - Insert(MergeInsertExpr), - /// An `UPDATE` clause - /// - /// Example: - /// ```sql - /// UPDATE SET quantity = T.quantity + S.quantity - /// ``` - Update { - /// The `UPDATE` token that starts the sub-expression. - update_token: AttachedToken, - assignments: Vec, - }, - /// A plain `DELETE` clause - Delete { - /// The `DELETE` token that starts the sub-expression. - delete_token: AttachedToken, - }, -} - -impl Display for MergeAction { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeAction::Insert(insert) => { - write!(f, "INSERT {insert}") - } - MergeAction::Update { assignments, .. } => { - write!(f, "UPDATE SET {}", display_comma_separated(assignments)) - } - MergeAction::Delete { .. } => { - write!(f, "DELETE") - } - } - } -} - -/// A when clause within a `MERGE` Statement -/// -/// Example: -/// ```sql -/// WHEN NOT MATCHED BY SOURCE AND product LIKE '%washer%' THEN DELETE -/// ``` -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MergeClause { - /// The `WHEN` token that starts the sub-expression. - pub when_token: AttachedToken, - pub clause_kind: MergeClauseKind, - pub predicate: Option, - pub action: MergeAction, -} - -impl Display for MergeClause { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let MergeClause { - when_token: _, - clause_kind, - predicate, - action, - } = self; - - write!(f, "WHEN {clause_kind}")?; - if let Some(pred) = predicate { - write!(f, " AND {pred}")?; - } - write!(f, " THEN {action}") - } -} - -/// A Output Clause in the end of a 'MERGE' Statement -/// -/// Example: -/// OUTPUT $action, deleted.* INTO dbo.temp_products; -/// [mssql](https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum OutputClause { - Output { - output_token: AttachedToken, - select_items: Vec, - into_table: Option, - }, - Returning { - returning_token: AttachedToken, - select_items: Vec, - }, -} - -impl fmt::Display for OutputClause { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - OutputClause::Output { - output_token: _, - select_items, - into_table, - } => { - f.write_str("OUTPUT ")?; - display_comma_separated(select_items).fmt(f)?; - if let Some(into_table) = into_table { - f.write_str(" ")?; - into_table.fmt(f)?; - } - Ok(()) - } - OutputClause::Returning { - returning_token: _, - select_items, - } => { - f.write_str("RETURNING ")?; - display_comma_separated(select_items).fmt(f) - } - } - } -} - +/// Objects that can be discarded with `DISCARD`. #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum DiscardObject { + /// Discard all session state. ALL, + /// Discard cached plans. PLANS, + /// Discard sequence values. SEQUENCES, + /// Discard temporary objects. TEMP, } @@ -8865,22 +9645,36 @@ impl fmt::Display for DiscardObject { } } +/// Types of flush operations supported by `FLUSH`. #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FlushType { + /// Flush binary logs. BinaryLogs, + /// Flush engine logs. EngineLogs, + /// Flush error logs. ErrorLogs, + /// Flush general logs. GeneralLogs, + /// Flush hosts information. Hosts, + /// Flush logs. Logs, + /// Flush privileges. Privileges, + /// Flush optimizer costs. OptimizerCosts, + /// Flush relay logs. RelayLogs, + /// Flush slow logs. SlowLogs, + /// Flush status. Status, + /// Flush user resources. UserResources, + /// Flush table data. Tables, } @@ -8904,11 +9698,14 @@ impl fmt::Display for FlushType { } } +/// Location modifier for flush commands. #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FlushLocation { + /// Do not write changes to the binary log. NoWriteToBinlog, + /// Apply flush locally. Local, } @@ -8954,7 +9751,9 @@ impl fmt::Display for ContextModifier { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum DropFunctionOption { + /// `RESTRICT` option for DROP FUNCTION. Restrict, + /// `CASCADE` option for DROP FUNCTION. Cascade, } @@ -8972,7 +9771,9 @@ impl fmt::Display for DropFunctionOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FunctionDesc { + /// The function name. pub name: ObjectName, + /// Optional list of function arguments. pub args: Option>, } @@ -8991,9 +9792,13 @@ impl fmt::Display for FunctionDesc { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct OperateFunctionArg { + /// Optional argument mode (`IN`, `OUT`, `INOUT`). pub mode: Option, + /// Optional argument identifier/name. pub name: Option, + /// The data type of the argument. pub data_type: DataType, + /// Optional default expression for the argument. pub default_expr: Option, } @@ -9040,8 +9845,11 @@ impl fmt::Display for OperateFunctionArg { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ArgMode { + /// `IN` mode. In, + /// `OUT` mode. Out, + /// `INOUT` mode. InOut, } @@ -9060,8 +9868,11 @@ impl fmt::Display for ArgMode { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FunctionBehavior { + /// Function is immutable. Immutable, + /// Function is stable. Stable, + /// Function is volatile. Volatile, } @@ -9075,13 +9886,76 @@ impl fmt::Display for FunctionBehavior { } } +/// Security attribute for functions: SECURITY DEFINER or SECURITY INVOKER. +/// +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FunctionSecurity { + /// Execute the function with the privileges of the user who defined it. + Definer, + /// Execute the function with the privileges of the user who invokes it. + Invoker, +} + +impl fmt::Display for FunctionSecurity { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FunctionSecurity::Definer => write!(f, "SECURITY DEFINER"), + FunctionSecurity::Invoker => write!(f, "SECURITY INVOKER"), + } + } +} + +/// Value for a SET configuration parameter in a CREATE FUNCTION statement. +/// +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FunctionSetValue { + /// SET param = value1, value2, ... + Values(Vec), + /// SET param FROM CURRENT + FromCurrent, +} + +/// A SET configuration_parameter clause in a CREATE FUNCTION statement. +/// +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct FunctionDefinitionSetParam { + /// The name of the configuration parameter. + pub name: Ident, + /// The value to set for the parameter. + pub value: FunctionSetValue, +} + +impl fmt::Display for FunctionDefinitionSetParam { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "SET {} ", self.name)?; + match &self.value { + FunctionSetValue::Values(values) => { + write!(f, "= {}", display_comma_separated(values)) + } + FunctionSetValue::FromCurrent => write!(f, "FROM CURRENT"), + } + } +} + /// These attributes describe the behavior of the function when called with a null argument. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FunctionCalledOnNull { + /// Function is called even when inputs are null. CalledOnNullInput, + /// Function returns null when any input is null. ReturnsNullOnNullInput, + /// Function is strict about null inputs. Strict, } @@ -9100,8 +9974,11 @@ impl fmt::Display for FunctionCalledOnNull { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FunctionParallel { + /// The function is not safe to run in parallel. Unsafe, + /// The function is restricted for parallel execution. Restricted, + /// The function is safe to run in parallel. Safe, } @@ -9122,7 +9999,9 @@ impl fmt::Display for FunctionParallel { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FunctionDeterminismSpecifier { + /// Function is deterministic. Deterministic, + /// Function is not deterministic. NotDeterministic, } @@ -9242,9 +10121,13 @@ pub enum CreateFunctionBody { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `USING` clause options for `CREATE FUNCTION` (e.g., JAR, FILE, ARCHIVE). pub enum CreateFunctionUsing { + /// Use a JAR file located at the given URI. Jar(String), + /// Use a file located at the given URI. File(String), + /// Use an archive located at the given URI. Archive(String), } @@ -9267,7 +10150,9 @@ impl fmt::Display for CreateFunctionUsing { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct MacroArg { + /// The argument name. pub name: Ident, + /// Optional default expression for the argument. pub default_expr: Option, } @@ -9294,8 +10179,11 @@ impl fmt::Display for MacroArg { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Definition for a DuckDB macro: either an expression or a table-producing query. pub enum MacroDefinition { + /// The macro is defined as an expression. Expr(Expr), + /// The macro is defined as a table (query). Table(Box), } @@ -9378,12 +10266,16 @@ impl fmt::Display for SearchModifier { } } +/// Represents a `LOCK TABLE` clause with optional alias and lock type. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct LockTable { + /// The table identifier to lock. pub table: Ident, + /// Optional alias for the table. pub alias: Option, + /// The type of lock to apply to the table. pub lock_type: LockTableType, } @@ -9407,9 +10299,18 @@ impl fmt::Display for LockTable { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The type of lock used in `LOCK TABLE` statements. pub enum LockTableType { - Read { local: bool }, - Write { low_priority: bool }, + /// Shared/read lock. If `local` is true, it's a local read lock. + Read { + /// Whether the read lock is local. + local: bool, + }, + /// Exclusive/write lock. If `low_priority` is true, the write is low priority. + Write { + /// Whether the write lock is low priority. + low_priority: bool, + }, } impl fmt::Display for LockTableType { @@ -9436,8 +10337,11 @@ impl fmt::Display for LockTableType { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Hive-specific `SET LOCATION` helper used in some `LOAD DATA` statements. pub struct HiveSetLocation { + /// Whether the `SET` keyword was present. pub has_set: bool, + /// The location identifier. pub location: Ident, } @@ -9455,8 +10359,11 @@ impl fmt::Display for HiveSetLocation { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MySQL `ALTER TABLE` column position specifier: `FIRST` or `AFTER `. pub enum MySQLColumnPosition { + /// Place the column first in the table. First, + /// Place the column after the specified identifier. After(Ident), } @@ -9476,9 +10383,13 @@ impl Display for MySQLColumnPosition { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MySQL `CREATE VIEW` algorithm options. pub enum CreateViewAlgorithm { + /// `UNDEFINED` algorithm. Undefined, + /// `MERGE` algorithm. Merge, + /// `TEMPTABLE` algorithm. TempTable, } @@ -9495,8 +10406,11 @@ impl Display for CreateViewAlgorithm { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MySQL `CREATE VIEW` SQL SECURITY options. pub enum CreateViewSecurity { + /// The view runs with the privileges of the definer. Definer, + /// The view runs with the privileges of the invoker. Invoker, } @@ -9516,8 +10430,11 @@ impl Display for CreateViewSecurity { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateViewParams { + /// Optional view algorithm (e.g., MERGE, TEMPTABLE). pub algorithm: Option, + /// Optional definer (the security principal that will own the view). pub definer: Option, + /// Optional SQL SECURITY setting for the view. pub security: Option, } @@ -9552,8 +10469,11 @@ impl Display for CreateViewParams { /// ENGINE = SummingMergeTree([columns]) /// ``` pub struct NamedParenthesizedList { + /// The option key (identifier) for this named list. pub key: Ident, + /// Optional secondary name associated with the key. pub name: Option, + /// The list of identifier values for the key. pub values: Vec, } @@ -9565,11 +10485,14 @@ pub struct NamedParenthesizedList { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct RowAccessPolicy { + /// The fully-qualified policy object name. pub policy: ObjectName, + /// Identifiers for the columns or objects the policy applies to. pub on: Vec, } impl RowAccessPolicy { + /// Create a new `RowAccessPolicy` for the given `policy` and `on` identifiers. pub fn new(policy: ObjectName, on: Vec) -> Self { Self { policy, on } } @@ -9586,6 +10509,30 @@ impl Display for RowAccessPolicy { } } +/// Snowflake `[ WITH ] STORAGE LIFECYCLE POLICY ON ( [ , ... ] )` +/// +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct StorageLifecyclePolicy { + /// The fully-qualified policy object name. + pub policy: ObjectName, + /// Column names the policy applies to. + pub on: Vec, +} + +impl Display for StorageLifecyclePolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "WITH STORAGE LIFECYCLE POLICY {} ON ({})", + self.policy, + display_comma_separated(self.on.as_slice()) + ) + } +} + /// Snowflake `WITH TAG ( tag_name = '', ...)` /// /// @@ -9593,11 +10540,14 @@ impl Display for RowAccessPolicy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Tag { + /// The tag key (can be qualified). pub key: ObjectName, + /// The tag value as a string. pub value: String, } impl Tag { + /// Create a new `Tag` with the given key and value. pub fn new(key: ObjectName, value: String) -> Self { Self { key, value } } @@ -9616,7 +10566,9 @@ impl Display for Tag { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ContactEntry { + /// The purpose label for the contact entry. pub purpose: String, + /// The contact information associated with the purpose. pub contact: String, } @@ -9634,6 +10586,7 @@ pub enum CommentDef { /// Includes `=` when printing the comment, as `COMMENT = 'comment'` /// Does not include `=` when printing the comment, as `COMMENT 'comment'` WithEq(String), + /// Comment variant that omits the `=` when displayed. WithoutEq(String), } @@ -9712,7 +10665,9 @@ where #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct UtilityOption { + /// The option name (identifier). pub name: Ident, + /// Optional argument for the option (number, string, keyword, etc.). pub arg: Option, } @@ -9733,10 +10688,15 @@ impl Display for UtilityOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ShowStatementOptions { + /// Optional scope to show in (for example: TABLE, SCHEMA). pub show_in: Option, - pub starts_with: Option, + /// Optional `STARTS WITH` filter value. + pub starts_with: Option, + /// Optional `LIMIT` expression. pub limit: Option, - pub limit_from: Option, + /// Optional `FROM` value used with `LIMIT`. + pub limit_from: Option, + /// Optional filter position (infix or suffix) for `LIKE`/`FILTER`. pub filter_position: Option, } @@ -9778,19 +10738,28 @@ impl Display for ShowStatementOptions { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Where a `SHOW` filter appears relative to the main clause. pub enum ShowStatementFilterPosition { + /// Put the filter in an infix position (e.g. `SHOW COLUMNS LIKE '%name%' IN TABLE tbl`). Infix(ShowStatementFilter), // For example: SHOW COLUMNS LIKE '%name%' IN TABLE tbl + /// Put the filter in a suffix position (e.g. `SHOW COLUMNS IN tbl LIKE '%name%'`). Suffix(ShowStatementFilter), // For example: SHOW COLUMNS IN tbl LIKE '%name%' } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Parent object types usable with `SHOW ... IN ` clauses. pub enum ShowStatementInParentType { + /// ACCOUNT parent type for SHOW statements. Account, + /// DATABASE parent type for SHOW statements. Database, + /// SCHEMA parent type for SHOW statements. Schema, + /// TABLE parent type for SHOW statements. Table, + /// VIEW parent type for SHOW statements. View, } @@ -9809,9 +10778,13 @@ impl fmt::Display for ShowStatementInParentType { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents a `SHOW ... IN` clause with optional parent qualifier and name. pub struct ShowStatementIn { + /// The clause that specifies what to show (e.g. COLUMNS, TABLES). pub clause: ShowStatementInClause, + /// Optional parent type qualifier (ACCOUNT/DATABASE/...). pub parent_type: Option, + /// Optional parent object name for the SHOW clause. #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub parent_name: Option, } @@ -9837,6 +10810,7 @@ pub struct ShowCharset { /// The statement can be written as `SHOW CHARSET` or `SHOW CHARACTER SET` /// true means CHARSET was used and false means CHARACTER SET was used pub is_shorthand: bool, + /// Optional `LIKE`/`WHERE`-style filter for the statement. pub filter: Option, } @@ -9848,8 +10822,8 @@ impl fmt::Display for ShowCharset { } else { write!(f, " CHARACTER SET")?; } - if self.filter.is_some() { - write!(f, " {}", self.filter.as_ref().unwrap())?; + if let Some(filter) = &self.filter { + write!(f, " {filter}")?; } Ok(()) } @@ -9858,8 +10832,11 @@ impl fmt::Display for ShowCharset { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Options for a `SHOW OBJECTS` statement. pub struct ShowObjects { + /// Whether to show terse output. pub terse: bool, + /// Additional options controlling the SHOW output. pub show_options: ShowStatementOptions, } @@ -9876,7 +10853,9 @@ pub struct ShowObjects { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum JsonNullClause { + /// `NULL ON NULL` behavior for JSON functions. NullOnNull, + /// `ABSENT ON NULL` behavior for JSON functions. AbsentOnNull, } @@ -9899,6 +10878,7 @@ impl Display for JsonNullClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct JsonReturningClause { + /// The data type to return from the JSON function (e.g. JSON/JSONB). pub data_type: DataType, } @@ -9913,7 +10893,9 @@ impl Display for JsonReturningClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct RenameTable { + /// The current name of the object to rename. pub old_name: ObjectName, + /// The new name for the object. pub new_name: ObjectName, } @@ -9943,6 +10925,16 @@ pub enum TableObject { /// ``` /// [Clickhouse](https://clickhouse.com/docs/en/sql-reference/table-functions) TableFunction(Function), + + /// Table specified through a sub-query + /// Example: + /// ```sql + /// INSERT INTO + /// (SELECT employee_id, last_name, email, hire_date, job_id, salary, commission_pct FROM employees) + /// VALUES (207, 'Gregory', 'pgregory@example.com', sysdate, 'PU_CLERK', 1.2E3, NULL); + /// ``` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/INSERT.html#GUID-903F8043-0254-4EE9-ACC1-CB8AC0AF3423__I2126242) + TableQuery(Box), } impl fmt::Display for TableObject { @@ -9950,6 +10942,7 @@ impl fmt::Display for TableObject { match self { Self::TableName(table_name) => write!(f, "{table_name}"), Self::TableFunction(func) => write!(f, "FUNCTION {func}"), + Self::TableQuery(table_query) => write!(f, "({table_query})"), } } } @@ -9959,7 +10952,9 @@ impl fmt::Display for TableObject { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct SetSessionAuthorizationParam { + /// The scope for the `SET SESSION AUTHORIZATION` (e.g., GLOBAL/SESSION). pub scope: ContextModifier, + /// The specific authorization parameter kind. pub kind: SetSessionAuthorizationParamKind, } @@ -9993,10 +10988,15 @@ impl fmt::Display for SetSessionAuthorizationParamKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Kind of session parameter being set by `SET SESSION`. pub enum SetSessionParamKind { + /// Generic session parameter (name/value pair). Generic(SetSessionParamGeneric), + /// Identity insert related parameter. IdentityInsert(SetSessionParamIdentityInsert), + /// Offsets-related parameter. Offsets(SetSessionParamOffsets), + /// Statistics-related parameter. Statistics(SetSessionParamStatistics), } @@ -10014,8 +11014,11 @@ impl fmt::Display for SetSessionParamKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Generic `SET SESSION` parameter represented as name(s) and value. pub struct SetSessionParamGeneric { + /// Names of the session parameters being set. pub names: Vec, + /// The value to assign to the parameter(s). pub value: String, } @@ -10028,8 +11031,11 @@ impl fmt::Display for SetSessionParamGeneric { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `IDENTITY_INSERT` session parameter for a specific object. pub struct SetSessionParamIdentityInsert { + /// Object name targeted by `IDENTITY_INSERT`. pub obj: ObjectName, + /// Value (ON/OFF) for the identity insert setting. pub value: SessionParamValue, } @@ -10042,8 +11048,11 @@ impl fmt::Display for SetSessionParamIdentityInsert { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Offsets-related session parameter with keywords and a value. pub struct SetSessionParamOffsets { + /// Keywords specifying which offsets to modify. pub keywords: Vec, + /// Value (ON/OFF) for the offsets setting. pub value: SessionParamValue, } @@ -10061,8 +11070,11 @@ impl fmt::Display for SetSessionParamOffsets { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Statistics-related session parameter specifying topic and value. pub struct SetSessionParamStatistics { + /// Statistics topic to set (IO/PROFILE/TIME/XML). pub topic: SessionParamStatsTopic, + /// Value (ON/OFF) for the statistics topic. pub value: SessionParamValue, } @@ -10075,10 +11087,15 @@ impl fmt::Display for SetSessionParamStatistics { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Topics available for session statistics configuration. pub enum SessionParamStatsTopic { + /// Input/output statistics. IO, + /// Profile statistics. Profile, + /// Time statistics. Time, + /// XML-related statistics. Xml, } @@ -10096,8 +11113,11 @@ impl fmt::Display for SessionParamStatsTopic { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Value for a session boolean-like parameter (ON/OFF). pub enum SessionParamValue { + /// Session parameter enabled. On, + /// Session parameter disabled. Off, } @@ -10120,7 +11140,9 @@ impl fmt::Display for SessionParamValue { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum StorageSerializationPolicy { + /// Use compatible serialization mode. Compatible, + /// Use optimized serialization mode. Optimized, } @@ -10143,7 +11165,9 @@ impl Display for StorageSerializationPolicy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CatalogSyncNamespaceMode { + /// Nest namespaces when syncing catalog. Nest, + /// Flatten namespaces when syncing catalog. Flatten, } @@ -10172,7 +11196,9 @@ pub enum CopyIntoSnowflakeKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `PRINT` statement for producing debug/output messages. pub struct PrintStatement { + /// The expression producing the message to print. pub message: Box, } @@ -10182,6 +11208,47 @@ impl fmt::Display for PrintStatement { } } +/// The type of `WAITFOR` statement (MSSQL). +/// +/// See: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum WaitForType { + /// `WAITFOR DELAY 'time_to_pass'` + Delay, + /// `WAITFOR TIME 'time_to_execute'` + Time, +} + +impl fmt::Display for WaitForType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + WaitForType::Delay => write!(f, "DELAY"), + WaitForType::Time => write!(f, "TIME"), + } + } +} + +/// MSSQL `WAITFOR` statement. +/// +/// See: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct WaitForStatement { + /// `DELAY` or `TIME`. + pub wait_type: WaitForType, + /// The time expression. + pub expr: Expr, +} + +impl fmt::Display for WaitForStatement { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "WAITFOR {} {}", self.wait_type, self.expr) + } +} + /// Represents a `Return` statement. /// /// [MsSql triggers](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-trigger-transact-sql) @@ -10190,6 +11257,7 @@ impl fmt::Display for PrintStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ReturnStatement { + /// Optional return value expression. pub value: Option, } @@ -10207,6 +11275,7 @@ impl fmt::Display for ReturnStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ReturnStatementValue { + /// Return an expression from a function or trigger. Expr(Expr), } @@ -10232,7 +11301,9 @@ impl fmt::Display for OpenStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum NullInclusion { + /// Include NULL values in the UNPIVOT output. IncludeNulls, + /// Exclude NULL values from the UNPIVOT output. ExcludeNulls, } @@ -10256,7 +11327,9 @@ impl fmt::Display for NullInclusion { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct MemberOf { + /// The value to check for membership. pub value: Box, + /// The JSON array expression to check against. pub array: Box, } @@ -10269,9 +11342,13 @@ impl fmt::Display for MemberOf { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents an `EXPORT DATA` statement. pub struct ExportData { + /// Options for the export operation. pub options: Vec, + /// The query producing the data to export. pub query: Box, + /// Optional named connection to use for export. pub connection: Option, } @@ -10306,11 +11383,17 @@ impl fmt::Display for ExportData { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateUser { + /// Replace existing user if present. pub or_replace: bool, + /// Only create the user if it does not already exist. pub if_not_exists: bool, + /// The name of the user to create. pub name: Ident, + /// Key/value options for user creation. pub options: KeyValueOptions, + /// Whether tags are specified using `WITH TAG`. pub with_tags: bool, + /// Tags for the user. pub tags: KeyValueOptions, } @@ -10340,35 +11423,58 @@ impl fmt::Display for CreateUser { /// Modifies the properties of a user /// -/// Syntax: +/// [Snowflake Syntax:](https://docs.snowflake.com/en/sql-reference/sql/alter-user) /// ```sql /// ALTER USER [ IF EXISTS ] [ ] [ OPTIONS ] /// ``` /// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/alter-user) +/// [PostgreSQL Syntax:](https://www.postgresql.org/docs/current/sql-alteruser.html) +/// ```sql +/// ALTER USER [ WITH ] option [ ... ] +/// ``` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUser { + /// Whether to only alter the user if it exists. pub if_exists: bool, + /// The name of the user to alter. pub name: Ident, - /// The following fields are Snowflake-specific: + /// Optional new name for the user (Snowflake-specific). + /// See: pub rename_to: Option, + /// Reset the user's password. pub reset_password: bool, + /// Abort all running queries for the user. pub abort_all_queries: bool, + /// Optionally add a delegated role authorization. pub add_role_delegation: Option, + /// Optionally remove a delegated role authorization. pub remove_role_delegation: Option, + /// Enroll the user in MFA. pub enroll_mfa: bool, + /// Set the default MFA method for the user. pub set_default_mfa_method: Option, + /// Remove the user's default MFA method. pub remove_mfa_method: Option, + /// Modify an MFA method for the user. pub modify_mfa_method: Option, + /// Add an MFA OTP method with optional count. pub add_mfa_method_otp: Option, + /// Set a user policy. pub set_policy: Option, + /// Unset a user policy. pub unset_policy: Option, + /// Key/value tag options to set on the user. pub set_tag: KeyValueOptions, + /// Tags to unset on the user. pub unset_tag: Vec, + /// Key/value properties to set on the user. pub set_props: KeyValueOptions, + /// Properties to unset on the user. pub unset_props: Vec, + /// The following options are PostgreSQL-specific: + pub password: Option, } /// ```sql @@ -10378,7 +11484,9 @@ pub struct AlterUser { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserAddRoleDelegation { + /// Role name to delegate. pub role: Ident, + /// Security integration receiving the delegation. pub integration: Ident, } @@ -10389,7 +11497,9 @@ pub struct AlterUserAddRoleDelegation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserRemoveRoleDelegation { + /// Optional role name to remove delegation for. pub role: Option, + /// Security integration from which to remove delegation. pub integration: Ident, } @@ -10400,7 +11510,8 @@ pub struct AlterUserRemoveRoleDelegation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserAddMfaMethodOtp { - pub count: Option, + /// Optional OTP count parameter. + pub count: Option, } /// ```sql @@ -10410,7 +11521,9 @@ pub struct AlterUserAddMfaMethodOtp { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserModifyMfaMethod { + /// The MFA method being modified. pub method: MfaMethodKind, + /// The new comment for the MFA method. pub comment: String, } @@ -10419,8 +11532,11 @@ pub struct AlterUserModifyMfaMethod { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum MfaMethodKind { + /// PassKey (hardware or platform passkey) MFA method. PassKey, + /// Time-based One-Time Password (TOTP) MFA method. Totp, + /// Duo Security MFA method. Duo, } @@ -10441,7 +11557,9 @@ impl fmt::Display for MfaMethodKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserSetPolicy { + /// The kind of user policy being set (authentication/password/session). pub policy_kind: UserPolicyKind, + /// The identifier of the policy to apply. pub policy: Ident, } @@ -10450,8 +11568,11 @@ pub struct AlterUserSetPolicy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum UserPolicyKind { + /// Authentication policy. Authentication, + /// Password policy. Password, + /// Session policy. Session, } @@ -10545,6 +11666,36 @@ impl fmt::Display for AlterUser { if !self.unset_props.is_empty() { write!(f, " UNSET {}", display_comma_separated(&self.unset_props))?; } + if let Some(password) = &self.password { + write!(f, " {}", password)?; + } + Ok(()) + } +} + +/// ```sql +/// ALTER USER [ WITH ] PASSWORD { 'password' | NULL }`` +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterUserPassword { + /// Whether the password is encrypted. + pub encrypted: bool, + /// The password string, or `None` for `NULL`. + pub password: Option, +} + +impl Display for AlterUserPassword { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.encrypted { + write!(f, "ENCRYPTED ")?; + } + write!(f, "PASSWORD")?; + match &self.password { + None => write!(f, " NULL")?, + Some(password) => write!(f, " '{}'", value::escape_single_quote_string(password))?, + } Ok(()) } } @@ -10573,8 +11724,11 @@ pub enum CreateTableLikeKind { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Controls whether defaults are included when creating a table FROM/LILE another. pub enum CreateTableLikeDefaults { + /// Include default values from the source table. Including, + /// Exclude default values from the source table. Excluding, } @@ -10590,8 +11744,11 @@ impl fmt::Display for CreateTableLikeDefaults { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents the `LIKE` clause of a `CREATE TABLE` statement. pub struct CreateTableLike { + /// The source table name to copy the schema from. pub name: ObjectName, + /// Optional behavior controlling whether defaults are copied. pub defaults: Option, } @@ -10612,8 +11769,11 @@ impl fmt::Display for CreateTableLike { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum RefreshModeKind { + /// Automatic refresh mode (`AUTO`). Auto, + /// Full refresh mode (`FULL`). Full, + /// Incremental refresh mode (`INCREMENTAL`). Incremental, } @@ -10634,7 +11794,9 @@ impl fmt::Display for RefreshModeKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum InitializeKind { + /// Initialize on creation (`ON CREATE`). OnCreate, + /// Initialize on schedule (`ON SCHEDULE`). OnSchedule, } @@ -10657,13 +11819,21 @@ impl fmt::Display for InitializeKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct VacuumStatement { + /// Whether `FULL` was specified. pub full: bool, + /// Whether `SORT ONLY` was specified. pub sort_only: bool, + /// Whether `DELETE ONLY` was specified. pub delete_only: bool, + /// Whether `REINDEX` was specified. pub reindex: bool, + /// Whether `RECLUSTER` was specified. pub recluster: bool, + /// Optional table to run `VACUUM` on. pub table_name: Option, - pub threshold: Option, + /// Optional threshold value (percent) for `TO threshold PERCENT`. + pub threshold: Option, + /// Whether `BOOST` was specified. pub boost: bool, } @@ -10711,9 +11881,71 @@ pub enum Reset { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ResetStatement { + /// The reset action to perform (either `ALL` or a specific configuration parameter). pub reset: Reset, } +/// Query optimizer hints are optionally supported comments after the +/// `SELECT`, `INSERT`, `UPDATE`, `REPLACE`, `MERGE`, and `DELETE` keywords in +/// the corresponding statements. +/// +/// See [Select::optimizer_hints] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct OptimizerHint { + /// An optional prefix between the comment marker and `+`. + /// + /// Standard optimizer hints like `/*+ ... */` have an empty prefix, + /// while system-specific hints like `/*abc+ ... */` have `prefix = "abc"`. + /// The prefix is any sequence of ASCII alphanumeric characters + /// immediately before the `+` marker. + pub prefix: String, + /// the raw text of the optimizer hint without its markers + pub text: String, + /// the style of the comment which `text` was extracted from, + /// e.g. `/*+...*/` or `--+...` + /// + /// Not all dialects support all styles, though. + pub style: OptimizerHintStyle, +} + +/// The commentary style of an [optimizer hint](OptimizerHint) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OptimizerHintStyle { + /// A hint corresponding to a single line comment, + /// e.g. `--+ LEADING(v.e v.d t)` + SingleLine { + /// the comment prefix, e.g. `--` + prefix: String, + }, + /// A hint corresponding to a multi line comment, + /// e.g. `/*+ LEADING(v.e v.d t) */` + MultiLine, +} + +impl fmt::Display for OptimizerHint { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.style { + OptimizerHintStyle::SingleLine { prefix } => { + f.write_str(prefix)?; + f.write_str(&self.prefix)?; + f.write_str("+")?; + f.write_str(&self.text) + } + OptimizerHintStyle::MultiLine => { + f.write_str("/*")?; + f.write_str(&self.prefix)?; + f.write_str("+")?; + f.write_str(&self.text)?; + f.write_str("*/") + } + } + } +} + impl fmt::Display for ResetStatement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match &self.reset { @@ -10813,6 +12045,12 @@ impl From for Statement { } } +impl From for Statement { + fn from(t: ThrowStatement) -> Self { + Self::Throw(t) + } +} + impl From for Statement { fn from(f: Function) -> Self { Self::Call(f) @@ -10855,6 +12093,24 @@ impl From for Statement { } } +impl From for Statement { + fn from(c: CreateOperator) -> Self { + Self::CreateOperator(c) + } +} + +impl From for Statement { + fn from(c: CreateOperatorFamily) -> Self { + Self::CreateOperatorFamily(c) + } +} + +impl From for Statement { + fn from(c: CreateOperatorClass) -> Self { + Self::CreateOperatorClass(c) + } +} + impl From for Statement { fn from(a: AlterSchema) -> Self { Self::AlterSchema(a) @@ -10867,6 +12123,36 @@ impl From for Statement { } } +impl From for Statement { + fn from(a: AlterOperator) -> Self { + Self::AlterOperator(a) + } +} + +impl From for Statement { + fn from(a: AlterOperatorFamily) -> Self { + Self::AlterOperatorFamily(a) + } +} + +impl From for Statement { + fn from(a: AlterOperatorClass) -> Self { + Self::AlterOperatorClass(a) + } +} + +impl From for Statement { + fn from(m: Merge) -> Self { + Self::Merge(m) + } +} + +impl From for Statement { + fn from(a: AlterUser) -> Self { + Self::AlterUser(a) + } +} + impl From for Statement { fn from(d: DropDomain) -> Self { Self::DropDomain(d) @@ -10909,6 +12195,24 @@ impl From for Statement { } } +impl From for Statement { + fn from(d: DropOperator) -> Self { + Self::DropOperator(d) + } +} + +impl From for Statement { + fn from(d: DropOperatorFamily) -> Self { + Self::DropOperatorFamily(d) + } +} + +impl From for Statement { + fn from(d: DropOperatorClass) -> Self { + Self::DropOperatorClass(d) + } +} + impl From for Statement { fn from(d: DenyStatement) -> Self { Self::Deny(d) diff --git a/src/ast/query.rs b/src/ast/query.rs index f6146e6299..9fea9e3d1c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -120,8 +120,11 @@ impl fmt::Display for Query { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ProjectionSelect { + /// The list of projected select items. pub projection: Vec, + /// Optional `ORDER BY` clause for the projection-select. pub order_by: Option, + /// Optional `GROUP BY` clause for the projection-select. pub group_by: Option, } @@ -151,17 +154,28 @@ pub enum SetExpr { /// in its body and an optional ORDER BY / LIMIT. Query(Box), /// UNION/EXCEPT/INTERSECT of two queries + /// A set operation combining two query expressions. SetOperation { + /// Left operand of the set operation. + left: Box, + /// The set operator used (e.g. `UNION`, `EXCEPT`). op: SetOperator, + /// Optional quantifier (`ALL`, `DISTINCT`, etc.). set_quantifier: SetQuantifier, - left: Box, + /// Right operand of the set operation. right: Box, }, + /// `VALUES (...)` Values(Values), + /// `INSERT` statement Insert(Statement), + /// `UPDATE` statement Update(Statement), + /// `DELETE` statement Delete(Statement), + /// `MERGE` statement Merge(Statement), + /// `TABLE` command Table(Box
), } @@ -222,10 +236,15 @@ impl fmt::Display for SetExpr { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A set operator for combining two `SetExpr`s. pub enum SetOperator { + /// `UNION` set operator Union, + /// `EXCEPT` set operator Except, + /// `INTERSECT` set operator Intersect, + /// `MINUS` set operator (non-standard) Minus, } @@ -247,11 +266,17 @@ impl fmt::Display for SetOperator { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SetQuantifier { + /// `ALL` quantifier All, + /// `DISTINCT` quantifier Distinct, + /// `BY NAME` quantifier ByName, + /// `ALL BY NAME` quantifier AllByName, + /// `DISTINCT BY NAME` quantifier DistinctByName, + /// No quantifier specified None, } @@ -272,8 +297,11 @@ impl fmt::Display for SetQuantifier { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] /// A [`TABLE` command]( https://www.postgresql.org/docs/current/sql-select.html#SQL-TABLE) #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A (possibly schema-qualified) table reference used in `FROM` clauses. pub struct Table { + /// Optional table name (absent for e.g. `TABLE` command without argument). pub table_name: Option, + /// Optional schema/catalog name qualifying the table. pub schema_name: Option, } @@ -294,7 +322,7 @@ impl fmt::Display for Table { } /// What did this select look like? -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SelectFlavor { @@ -306,17 +334,129 @@ pub enum SelectFlavor { FromFirstNoSelect, } +/// MySQL-specific SELECT modifiers that appear after the SELECT keyword. +/// +/// These modifiers affect query execution and optimization. They can appear in any order after +/// SELECT and before the column list, can be repeated, and can be interleaved with +/// DISTINCT/DISTINCTROW/ALL: +/// +/// ```sql +/// SELECT +/// [ALL | DISTINCT | DISTINCTROW] +/// [HIGH_PRIORITY] +/// [STRAIGHT_JOIN] +/// [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT] +/// [SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS] +/// select_expr [, select_expr] ... +/// ``` +/// +/// See [MySQL SELECT](https://dev.mysql.com/doc/refman/8.4/en/select.html). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct SelectModifiers { + /// `HIGH_PRIORITY` gives the SELECT higher priority than statements that update a table. + /// + /// + pub high_priority: bool, + /// `STRAIGHT_JOIN` forces the optimizer to join tables in the order listed in the FROM clause. + /// + /// + pub straight_join: bool, + /// `SQL_SMALL_RESULT` hints that the result set is small, using in-memory temp tables. + /// + /// + pub sql_small_result: bool, + /// `SQL_BIG_RESULT` hints that the result set is large, using disk-based temp tables. + /// + /// + pub sql_big_result: bool, + /// `SQL_BUFFER_RESULT` forces the result to be put into a temporary table to release locks early. + /// + /// + pub sql_buffer_result: bool, + /// `SQL_NO_CACHE` tells MySQL not to cache the query result. (Deprecated in 8.4+.) + /// + /// + pub sql_no_cache: bool, + /// `SQL_CALC_FOUND_ROWS` tells MySQL to calculate the total number of rows. (Deprecated in 8.0.17+.) + /// + /// - [MySQL SELECT modifiers](https://dev.mysql.com/doc/refman/8.4/en/select.html) + /// - [`FOUND_ROWS()`](https://dev.mysql.com/doc/refman/8.4/en/information-functions.html#function_found-rows) + pub sql_calc_found_rows: bool, +} + +impl fmt::Display for SelectModifiers { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.high_priority { + f.write_str(" HIGH_PRIORITY")?; + } + if self.straight_join { + f.write_str(" STRAIGHT_JOIN")?; + } + if self.sql_small_result { + f.write_str(" SQL_SMALL_RESULT")?; + } + if self.sql_big_result { + f.write_str(" SQL_BIG_RESULT")?; + } + if self.sql_buffer_result { + f.write_str(" SQL_BUFFER_RESULT")?; + } + if self.sql_no_cache { + f.write_str(" SQL_NO_CACHE")?; + } + if self.sql_calc_found_rows { + f.write_str(" SQL_CALC_FOUND_ROWS")?; + } + Ok(()) + } +} + +impl SelectModifiers { + /// Returns true if any of the modifiers are set. + pub fn is_any_set(&self) -> bool { + // Using irrefutable destructuring to catch fields added in the future + let Self { + high_priority, + straight_join, + sql_small_result, + sql_big_result, + sql_buffer_result, + sql_no_cache, + sql_calc_found_rows, + } = self; + *high_priority + || *straight_join + || *sql_small_result + || *sql_big_result + || *sql_buffer_result + || *sql_no_cache + || *sql_calc_found_rows + } +} + /// A restricted variant of `SELECT` (without CTEs/`ORDER BY`), which may /// appear either as the only body item of a `Query`, or as an operand /// to a set operation like `UNION`. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "visitor", visit(with = "visit_select"))] pub struct Select { /// Token for the `SELECT` keyword pub select_token: AttachedToken, + /// Query optimizer hints + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hints: Vec, /// `SELECT [DISTINCT] ...` pub distinct: Option, + /// MySQL-specific SELECT modifiers. + /// + /// See [MySQL SELECT](https://dev.mysql.com/doc/refman/8.4/en/select.html). + pub select_modifiers: Option, /// MSSQL syntax: `TOP () [ PERCENT ] [ WITH TIES ]` pub top: Option, /// Whether the top was located before `ALL`/`DISTINCT` @@ -341,6 +481,8 @@ pub struct Select { pub prewhere: Option, /// WHERE pub selection: Option, + /// [START WITH ..] CONNECT BY .. + pub connect_by: Vec, /// GROUP BY pub group_by: GroupByExpr, /// CLUSTER BY (Hive) @@ -362,8 +504,6 @@ pub struct Select { pub window_before_qualify: bool, /// BigQuery syntax: `SELECT AS VALUE | SELECT AS STRUCT` pub value_table_mode: Option, - /// STARTING WITH .. CONNECT BY - pub connect_by: Option, /// Was this a FROM-first query? pub flavor: SelectFlavor, } @@ -382,6 +522,11 @@ impl fmt::Display for Select { } } + for hint in &self.optimizer_hints { + f.write_str(" ")?; + hint.fmt(f)?; + } + if let Some(value_table_mode) = self.value_table_mode { f.write_str(" ")?; value_table_mode.fmt(f)?; @@ -404,6 +549,10 @@ impl fmt::Display for Select { } } + if let Some(ref select_modifiers) = self.select_modifiers { + select_modifiers.fmt(f)?; + } + if !self.projection.is_empty() { indented_list(f, &self.projection)?; } @@ -437,6 +586,10 @@ impl fmt::Display for Select { SpaceOrNewline.fmt(f)?; Indent(selection).fmt(f)?; } + for clause in &self.connect_by { + SpaceOrNewline.fmt(f)?; + clause.fmt(f)?; + } match &self.group_by { GroupByExpr::All(_) => { SpaceOrNewline.fmt(f)?; @@ -500,10 +653,6 @@ impl fmt::Display for Select { display_comma_separated(&self.named_window).fmt(f)?; } } - if let Some(ref connect_by) = self.connect_by { - SpaceOrNewline.fmt(f)?; - connect_by.fmt(f)?; - } Ok(()) } } @@ -588,6 +737,7 @@ impl fmt::Display for NamedWindowExpr { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A named window definition: ` AS ` pub struct NamedWindowDefinition(pub Ident, pub NamedWindowExpr); impl fmt::Display for NamedWindowDefinition { @@ -599,10 +749,13 @@ impl fmt::Display for NamedWindowDefinition { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A `WITH` clause, introducing common table expressions (CTEs). pub struct With { - /// Token for the "WITH" keyword + /// Token for the `WITH` keyword pub with_token: AttachedToken, + /// Whether the `WITH` is recursive (`WITH RECURSIVE`). pub recursive: bool, + /// The list of CTEs declared by this `WITH` clause. pub cte_tables: Vec, } @@ -617,9 +770,10 @@ impl fmt::Display for With { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Indicates whether a CTE is materialized or not. pub enum CteAsMaterialized { /// The `WITH` statement specifies `AS MATERIALIZED` behavior Materialized, @@ -649,11 +803,15 @@ impl fmt::Display for CteAsMaterialized { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Cte { + /// The CTE alias (name introduced before the `AS` keyword). pub alias: TableAlias, + /// The query that defines the CTE body. pub query: Box, + /// Optional `FROM` identifier for materialized CTEs. pub from: Option, + /// Optional `AS MATERIALIZED` / `AS NOT MATERIALIZED` hint. pub materialized: Option, - /// Token for the closing parenthesis + /// Token for the closing parenthesis of the CTE definition. pub closing_paren_token: AttachedToken, } @@ -708,7 +866,12 @@ pub enum SelectItem { /// Any expression, not followed by `[ AS ] alias` UnnamedExpr(Expr), /// An expression, followed by `[ AS ] alias` - ExprWithAlias { expr: Expr, alias: Ident }, + ExprWithAlias { + /// The expression being projected. + expr: Expr, + /// The alias for the expression. + alias: Ident, + }, /// An expression, followed by a wildcard expansion. /// e.g. `alias.*`, `STRUCT('foo').*` QualifiedWildcard(SelectItemQualifiedWildcardKind, WildcardAdditionalOptions), @@ -737,7 +900,9 @@ impl fmt::Display for SelectItemQualifiedWildcardKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IdentWithAlias { + /// The identifier being aliased. pub ident: Ident, + /// The alias to apply to `ident`. pub alias: Ident, } @@ -769,6 +934,9 @@ pub struct WildcardAdditionalOptions { pub opt_replace: Option, /// `[RENAME ...]`. pub opt_rename: Option, + /// `[AS ]`. + /// Redshift syntax: + pub opt_alias: Option, } impl Default for WildcardAdditionalOptions { @@ -780,6 +948,7 @@ impl Default for WildcardAdditionalOptions { opt_except: None, opt_replace: None, opt_rename: None, + opt_alias: None, } } } @@ -801,6 +970,9 @@ impl fmt::Display for WildcardAdditionalOptions { if let Some(rename) = &self.opt_rename { write!(f, " {rename}")?; } + if let Some(alias) = &self.opt_alias { + write!(f, " AS {alias}")?; + } Ok(()) } } @@ -815,6 +987,7 @@ impl fmt::Display for WildcardAdditionalOptions { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IlikeSelectItem { + /// The pattern expression used with `ILIKE`. pub pattern: String, } @@ -845,13 +1018,13 @@ pub enum ExcludeSelectItem { /// ```plaintext /// /// ``` - Single(Ident), + Single(ObjectName), /// Multiple column names inside parenthesis. /// # Syntax /// ```plaintext /// (, , ...) /// ``` - Multiple(Vec), + Multiple(Vec), } impl fmt::Display for ExcludeSelectItem { @@ -954,6 +1127,7 @@ impl fmt::Display for ExceptSelectItem { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ReplaceSelectItem { + /// List of replacement elements contained in the `REPLACE(...)` clause. pub items: Vec>, } @@ -973,8 +1147,11 @@ impl fmt::Display for ReplaceSelectItem { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ReplaceSelectElement { + /// Expression producing the replacement value. pub expr: Expr, + /// The target column name for the replacement. pub column_name: Ident, + /// Whether the `AS` keyword was present in the original syntax. pub as_keyword: bool, } @@ -1013,8 +1190,11 @@ impl fmt::Display for SelectItem { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A left table followed by zero or more joins. pub struct TableWithJoins { + /// The starting table factor (left side) of the join chain. pub relation: TableFactor, + /// The sequence of joins applied to the relation. pub joins: Vec, } @@ -1032,32 +1212,71 @@ impl fmt::Display for TableWithJoins { /// Joins a table to itself to process hierarchical data in the table. /// /// See . +/// See #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct ConnectBy { - /// START WITH - pub condition: Expr, +pub enum ConnectByKind { /// CONNECT BY - pub relationships: Vec, + ConnectBy { + /// the `CONNECT` token + connect_token: AttachedToken, + + /// [CONNECT BY] NOCYCLE + /// + /// Optional on [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Hierarchical-Queries.html#GUID-0118DF1D-B9A9-41EB-8556-C6E7D6A5A84E__GUID-5377971A-F518-47E4-8781-F06FEB3EF993) + nocycle: bool, + + /// join conditions denoting the hierarchical relationship + relationships: Vec, + }, + + /// START WITH + /// + /// Optional on [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Hierarchical-Queries.html#GUID-0118DF1D-B9A9-41EB-8556-C6E7D6A5A84E) + /// when comming _after_ the `CONNECT BY`. + StartWith { + /// the `START` token + start_token: AttachedToken, + + /// condition selecting the root rows of the hierarchy + condition: Box, + }, } -impl fmt::Display for ConnectBy { +impl fmt::Display for ConnectByKind { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "START WITH {condition} CONNECT BY {relationships}", - condition = self.condition, - relationships = display_comma_separated(&self.relationships) - ) + match self { + ConnectByKind::ConnectBy { + connect_token: _, + nocycle, + relationships, + } => { + write!( + f, + "CONNECT BY {nocycle}{relationships}", + nocycle = if *nocycle { "NOCYCLE " } else { "" }, + relationships = display_comma_separated(relationships) + ) + } + ConnectByKind::StartWith { + start_token: _, + condition, + } => { + write!(f, "START WITH {condition}") + } + } } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single setting key-value pair. pub struct Setting { + /// Setting name/key. pub key: Ident, + /// The value expression assigned to the setting. pub value: Expr, } @@ -1077,7 +1296,9 @@ impl fmt::Display for Setting { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ExprWithAlias { + /// The expression. pub expr: Expr, + /// Optional alias for the expression. pub alias: Option, } @@ -1102,7 +1323,9 @@ impl fmt::Display for ExprWithAlias { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ExprWithAliasAndOrderBy { + /// Expression with optional alias. pub expr: ExprWithAlias, + /// Ordering options applied to the expression. pub order_by: OrderByOptions, } @@ -1117,20 +1340,25 @@ impl fmt::Display for ExprWithAliasAndOrderBy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TableFunctionArgs { + /// The list of arguments passed to the table-valued function. pub args: Vec, - /// ClickHouse-specific SETTINGS clause. + /// ClickHouse-specific `SETTINGS` clause. /// For example, /// `SELECT * FROM executable('generate_random.py', TabSeparated, 'id UInt32, random String', SETTINGS send_chunk_header = false, pool_size = 16)` /// [`executable` table function](https://clickhouse.com/docs/en/engines/table-functions/executable) pub settings: Option>, } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Type of index hint (e.g., `USE`, `IGNORE`, `FORCE`). pub enum TableIndexHintType { + /// `USE` hint. Use, + /// `IGNORE` hint. Ignore, + /// `FORCE` hint. Force, } @@ -1144,11 +1372,14 @@ impl fmt::Display for TableIndexHintType { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The kind of index referenced by an index hint (e.g. `USE INDEX`). pub enum TableIndexType { + /// The `INDEX` kind. Index, + /// The `KEY` kind. Key, } @@ -1161,12 +1392,16 @@ impl fmt::Display for TableIndexType { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Which clause the table index hint applies to. pub enum TableIndexHintForClause { + /// Apply the hint to JOIN clauses. Join, + /// Apply the hint to `ORDER BY` clauses. OrderBy, + /// Apply the hint to `GROUP BY` clauses. GroupBy, } @@ -1183,10 +1418,15 @@ impl fmt::Display for TableIndexHintForClause { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MySQL-style index hints attached to a table (e.g., `USE INDEX(...)`). pub struct TableIndexHints { + /// Type of hint (e.g., `USE`, `FORCE`, or `IGNORE`). pub hint_type: TableIndexHintType, + /// The index type (e.g., `INDEX`). pub index_type: TableIndexType, + /// Optional `FOR` clause specifying the scope (JOIN / ORDER BY / GROUP BY). pub for_clause: Option, + /// List of index names referred to by the hint. pub index_names: Vec, } @@ -1206,9 +1446,12 @@ impl fmt::Display for TableIndexHints { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "visitor", visit(with = "visit_table_factor"))] pub enum TableFactor { + /// A named table or relation, possibly with arguments, hints, or sampling. Table { #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + /// Table or relation name. name: ObjectName, + /// Optional alias for the table (e.g. `table AS t`). alias: Option, /// Arguments of a table-valued function, as supported by Postgres /// and MSSQL. Note that deprecated MSSQL `FROM foo (NOLOCK)` syntax @@ -1238,27 +1481,41 @@ pub enum TableFactor { /// See: index_hints: Vec, }, + /// A derived table (a parenthesized subquery), optionally `LATERAL`. Derived { + /// Whether the derived table is LATERAL. lateral: bool, + /// The subquery producing the derived table. subquery: Box, + /// Optional alias for the derived table. alias: Option, + /// Optional table sample modifier + sample: Option, }, /// A pass-through query string that is not parsed. /// This is useful while building/rewriting queries with a known valid SQL string and to avoid parsing it. PassThroughQuery { + /// The raw SQL query string to pass through without parsing. query: String, + /// Optional alias for the pass-through query. alias: Option, }, /// `TABLE()[ AS ]` TableFunction { + /// Expression representing the table function call. expr: Expr, + /// Optional alias for the table function result. alias: Option, }, /// `e.g. LATERAL FLATTEN()[ AS ]` Function { + /// Whether the function is LATERAL. lateral: bool, + /// Name of the table function. name: ObjectName, + /// Arguments passed to the function. args: Vec, + /// Optional alias for the result of the function. alias: Option, }, /// ```sql @@ -1272,10 +1529,15 @@ pub enum TableFactor { /// +---------+--------+ /// ``` UNNEST { + /// Optional alias for the UNNEST table (e.g. `UNNEST(...) AS t`). alias: Option, + /// Expressions producing the arrays to be unnested. array_exprs: Vec, + /// Whether `WITH OFFSET` was specified to include element offsets. with_offset: bool, + /// Optional alias for the offset column when `WITH OFFSET` is used. with_offset_alias: Option, + /// Whether `WITH ORDINALITY` was specified to include ordinality. with_ordinality: bool, }, /// The `JSON_TABLE` table-valued function. @@ -1298,7 +1560,7 @@ pub enum TableFactor { json_expr: Expr, /// The path to the array or object to be iterated over. /// It must evaluate to a json array or object. - json_path: Value, + json_path: ValueWithSpan, /// The columns to be extracted from each element of the array or object. /// Each column must have a name and a type. columns: Vec, @@ -1319,7 +1581,7 @@ pub enum TableFactor { json_expr: Expr, /// The path to the array or object to be iterated over. /// It must evaluate to a json array or object. - json_path: Option, + json_path: Option, /// The columns to be extracted from each element of the array or object. /// Each column must have a name and a type. columns: Vec, @@ -1333,7 +1595,9 @@ pub enum TableFactor { /// The parser may also accept non-standard nesting of bare tables for some /// dialects, but the information about such nesting is stripped from AST. NestedJoin { + /// The nested join expression contained in parentheses. table_with_joins: Box, + /// Optional alias for the nested join. alias: Option, }, /// Represents PIVOT operation on a table. @@ -1341,12 +1605,19 @@ pub enum TableFactor { /// /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#pivot_operator) /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constructs/pivot) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/SELECT.html#GUID-CFA006CA-6FF1-4972-821E-6996142A51C6__GUID-68257B27-1C4C-4C47-8140-5C60E0E65D35) Pivot { + /// The input table to pivot. table: Box, + /// Aggregate expressions used as pivot values (optionally aliased). aggregate_functions: Vec, // Function expression + /// Columns producing the values to be pivoted. value_column: Vec, + /// Source of pivot values (e.g. list of literals or columns). value_source: PivotValueSource, + /// Optional expression providing a default when a pivot produces NULL. default_on_null: Option, + /// Optional alias for the pivoted table. alias: Option, }, /// An UNPIVOT operation on a table. @@ -1356,20 +1627,29 @@ pub enum TableFactor { /// table UNPIVOT [ { INCLUDE | EXCLUDE } NULLS ] (value FOR name IN (column1, [ column2, ... ])) [ alias ] /// ``` /// - /// See . - /// See . + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constructs/unpivot) + /// [Databricks](https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot) + /// [BigQuery](https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#unpivot_operator) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/SELECT.html#GUID-CFA006CA-6FF1-4972-821E-6996142A51C6__GUID-9B4E0389-413C-4014-94A1-0A0571BDF7E1) Unpivot { + /// The input table to unpivot. table: Box, + /// Expression producing the unpivoted value. value: Expr, + /// Identifier used for the generated column name. name: Ident, + /// Columns or expressions to unpivot, optionally aliased. columns: Vec, + /// Whether to include or exclude NULLs during unpivot. null_inclusion: Option, + /// Optional alias for the resulting table. alias: Option, }, /// A `MATCH_RECOGNIZE` operation on a table. /// /// See . MatchRecognize { + /// The input table to apply `MATCH_RECOGNIZE` on. table: Box, /// `PARTITION BY [, ... ]` partition_by: Vec, @@ -1385,6 +1665,7 @@ pub enum TableFactor { pattern: MatchRecognizePattern, /// `DEFINE AS [, ... ]` symbols: Vec, + /// The alias for the table. alias: Option, }, /// The `XMLTABLE` table-valued function. @@ -1459,20 +1740,30 @@ pub enum TableSampleKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents a `TABLESAMPLE` clause and its options. pub struct TableSample { + /// Modifier (e.g. `SAMPLE` or `TABLESAMPLE`). pub modifier: TableSampleModifier, + /// Optional sampling method name (e.g. `BERNOULLI`, `SYSTEM`). pub name: Option, + /// Optional sampling quantity (value and optional unit). pub quantity: Option, + /// Optional seed clause. pub seed: Option, + /// Optional bucket specification for `BUCKET ... OUT OF ...`-style sampling. pub bucket: Option, + /// Optional offset expression for sampling. pub offset: Option, } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Modifier specifying whether `SAMPLE` or `TABLESAMPLE` keyword was used. pub enum TableSampleModifier { + /// `SAMPLE` modifier. Sample, + /// `TABLESAMPLE` modifier. TableSample, } @@ -1489,9 +1780,13 @@ impl fmt::Display for TableSampleModifier { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Quantity for a `TABLESAMPLE` clause (e.g. `10 PERCENT` or `(10)`). pub struct TableSampleQuantity { + /// Whether the quantity was wrapped in parentheses. pub parenthesized: bool, + /// The numeric expression specifying the quantity. pub value: Expr, + /// Optional unit (e.g. `PERCENT`, `ROWS`). pub unit: Option, } @@ -1512,13 +1807,18 @@ impl fmt::Display for TableSampleQuantity { } /// The table sample method names -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Sampling method used by `TABLESAMPLE`. pub enum TableSampleMethod { + /// `ROW` sampling method. Row, + /// `BERNOULLI` sampling method. Bernoulli, + /// `SYSTEM` sampling method. System, + /// `BLOCK` sampling method. Block, } @@ -1536,9 +1836,12 @@ impl fmt::Display for TableSampleMethod { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `SEED` or `REPEATABLE` clause used with sampling. pub struct TableSampleSeed { + /// Seed modifier (e.g. `REPEATABLE` or `SEED`). pub modifier: TableSampleSeedModifier, - pub value: Value, + /// The seed value expression. + pub value: ValueWithSpan, } impl fmt::Display for TableSampleSeed { @@ -1548,11 +1851,14 @@ impl fmt::Display for TableSampleSeed { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Modifier specifying how the sample seed is applied. pub enum TableSampleSeedModifier { + /// `REPEATABLE` modifier. Repeatable, + /// `SEED` modifier. Seed, } @@ -1565,11 +1871,14 @@ impl fmt::Display for TableSampleSeedModifier { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Unit used with a `TABLESAMPLE` quantity (rows or percent). pub enum TableSampleUnit { + /// `ROWS` unit. Rows, + /// `PERCENT` unit. Percent, } @@ -1585,9 +1894,13 @@ impl fmt::Display for TableSampleUnit { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Bucket-based sampling clause: `BUCKET OUT OF [ON ]`. pub struct TableSampleBucket { - pub bucket: Value, - pub total: Value, + /// The bucket index expression. + pub bucket: ValueWithSpan, + /// The total number of buckets expression. + pub total: ValueWithSpan, + /// Optional `ON ` specification. pub on: Option, } @@ -1663,8 +1976,11 @@ impl fmt::Display for PivotValueSource { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An item in the `MEASURES` clause of `MATCH_RECOGNIZE`. pub struct Measure { + /// Expression producing the measure value. pub expr: Expr, + /// Alias for the measure column. pub alias: Ident, } @@ -1734,6 +2050,7 @@ impl fmt::Display for AfterMatchSkip { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The mode for handling empty matches in a `MATCH_RECOGNIZE` operation. pub enum EmptyMatchesMode { /// `SHOW EMPTY MATCHES` Show, @@ -1759,8 +2076,11 @@ impl fmt::Display for EmptyMatchesMode { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A symbol defined in a `MATCH_RECOGNIZE` operation. pub struct SymbolDefinition { + /// The symbol identifier. pub symbol: Ident, + /// The expression defining the symbol. pub definition: Expr, } @@ -1928,6 +2248,7 @@ impl fmt::Display for TableFactor { lateral, subquery, alias, + sample, } => { if *lateral { write!(f, "LATERAL ")?; @@ -1940,6 +2261,9 @@ impl fmt::Display for TableFactor { if let Some(alias) = alias { write!(f, " {alias}")?; } + if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { + write!(f, " {sample}")?; + } Ok(()) } TableFactor::PassThroughQuery { query, alias } => { @@ -2193,12 +2517,15 @@ impl fmt::Display for TableFactor { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An alias for a table reference, optionally including an explicit `AS` and column names. pub struct TableAlias { /// Tells whether the alias was introduced with an explicit, preceding "AS" /// keyword, e.g. `AS name`. Typically, the keyword is preceding the name /// (e.g. `.. FROM table AS t ..`). pub explicit: bool, + /// Alias identifier for the table. pub name: Ident, + /// Optional column aliases declared in parentheses after the table alias. pub columns: Vec, } @@ -2250,20 +2577,54 @@ impl fmt::Display for TableAliasColumnDef { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Specifies a table version selection, e.g. `FOR SYSTEM_TIME AS OF` or `AT(...)`. pub enum TableVersion { /// When the table version is defined using `FOR SYSTEM_TIME AS OF`. /// For example: `SELECT * FROM tbl FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)` ForSystemTimeAsOf(Expr), + /// When the table version is defined using `TIMESTAMP AS OF`. + /// Databricks supports this syntax. + /// For example: `SELECT * FROM tbl TIMESTAMP AS OF CURRENT_TIMESTAMP() - INTERVAL 1 HOUR` + TimestampAsOf(Expr), + /// When the table version is defined using `VERSION AS OF`. + /// Databricks supports this syntax. + /// For example: `SELECT * FROM tbl VERSION AS OF 2` + VersionAsOf(Expr), /// When the table version is defined using a function. /// For example: `SELECT * FROM tbl AT(TIMESTAMP => '2020-08-14 09:30:00')` Function(Expr), + /// Snowflake `CHANGES` clause for change tracking queries. + /// For example: + /// ```sql + /// SELECT * FROM t + /// CHANGES(INFORMATION => DEFAULT) + /// AT(TIMESTAMP => TO_TIMESTAMP_TZ('...')) + /// END(TIMESTAMP => TO_TIMESTAMP_TZ('...')) + /// ``` + /// + Changes { + /// The `CHANGES(INFORMATION => ...)` function-call expression. + changes: Expr, + /// The `AT(TIMESTAMP => ...)` function-call expression. + at: Expr, + /// The optional `END(TIMESTAMP => ...)` function-call expression. + end: Option, + }, } impl Display for TableVersion { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { TableVersion::ForSystemTimeAsOf(e) => write!(f, "FOR SYSTEM_TIME AS OF {e}")?, + TableVersion::TimestampAsOf(e) => write!(f, "TIMESTAMP AS OF {e}")?, + TableVersion::VersionAsOf(e) => write!(f, "VERSION AS OF {e}")?, TableVersion::Function(func) => write!(f, "{func}")?, + TableVersion::Changes { changes, at, end } => { + write!(f, "{changes} {at}")?; + if let Some(end) = end { + write!(f, " {end}")?; + } + } } Ok(()) } @@ -2272,11 +2633,14 @@ impl Display for TableVersion { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single `JOIN` clause including relation and join operator/options. pub struct Join { + /// The joined table factor (table reference or derived table). pub relation: TableFactor, /// ClickHouse supports the optional `GLOBAL` keyword before the join operator. /// See [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/join) pub global: bool, + /// The join operator and its constraint (INNER/LEFT/RIGHT/CROSS/ASOF/etc.). pub join_operator: JoinOperator, } @@ -2413,41 +2777,50 @@ impl fmt::Display for Join { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The operator used for joining two tables, e.g. `INNER`, `LEFT`, `CROSS`, `ASOF`, etc. pub enum JoinOperator { + /// Generic `JOIN` with an optional constraint. Join(JoinConstraint), + /// `INNER JOIN` with an optional constraint. Inner(JoinConstraint), + /// `LEFT JOIN` with an optional constraint. Left(JoinConstraint), + /// `LEFT OUTER JOIN` with an optional constraint. LeftOuter(JoinConstraint), + /// `RIGHT JOIN` with an optional constraint. Right(JoinConstraint), + /// `RIGHT OUTER JOIN` with an optional constraint. RightOuter(JoinConstraint), + /// `FULL OUTER JOIN` with an optional constraint. FullOuter(JoinConstraint), - /// CROSS (constraint is non-standard) + /// `CROSS JOIN` (constraint usage is non-standard). CrossJoin(JoinConstraint), - /// SEMI (non-standard) + /// `SEMI JOIN` (non-standard) Semi(JoinConstraint), - /// LEFT SEMI (non-standard) + /// `LEFT SEMI JOIN` (non-standard) LeftSemi(JoinConstraint), - /// RIGHT SEMI (non-standard) + /// `RIGHT SEMI JOIN` (non-standard) RightSemi(JoinConstraint), - /// ANTI (non-standard) + /// `ANTI JOIN` (non-standard) Anti(JoinConstraint), - /// LEFT ANTI (non-standard) + /// `LEFT ANTI JOIN` (non-standard) LeftAnti(JoinConstraint), - /// RIGHT ANTI (non-standard) + /// `RIGHT ANTI JOIN` (non-standard) RightAnti(JoinConstraint), - /// CROSS APPLY (non-standard) + /// `CROSS APPLY` (non-standard) CrossApply, - /// OUTER APPLY (non-standard) + /// `OUTER APPLY` (non-standard) OuterApply, - /// `ASOF` joins are used for joining tables containing time-series data - /// whose timestamp columns do not match exactly. + /// `ASOF` joins are used for joining time-series tables whose timestamp columns do not match exactly. /// /// See . AsOf { + /// Condition used to match records in the `ASOF` join. match_condition: Expr, + /// Additional constraint applied to the `ASOF` join. constraint: JoinConstraint, }, - /// STRAIGHT_JOIN (non-standard) + /// `STRAIGHT_JOIN` (MySQL non-standard behavior) /// /// See . StraightJoin(JoinConstraint), @@ -2456,35 +2829,42 @@ pub enum JoinOperator { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents how two tables are constrained in a join: `ON`, `USING`, `NATURAL`, or none. pub enum JoinConstraint { + /// `ON ` join condition. On(Expr), + /// `USING(...)` list of column names. Using(Vec), + /// `NATURAL` join (columns matched automatically). Natural, + /// No constraint specified (e.g. `CROSS JOIN`). None, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The kind of `ORDER BY` clause: either `ALL` with modifiers or a list of expressions. pub enum OrderByKind { - /// ALL syntax of [DuckDB] and [ClickHouse]. + /// `GROUP BY ALL`/`ORDER BY ALL` syntax with optional modifiers. /// /// [DuckDB]: /// [ClickHouse]: All(OrderByOptions), - /// Expressions + /// A standard list of ordering expressions. Expressions(Vec), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents an `ORDER BY` clause with its kind and optional `INTERPOLATE`. pub struct OrderBy { + /// The kind of ordering (expressions or `ALL`). pub kind: OrderByKind, - /// Optional: `INTERPOLATE` - /// Supported by [ClickHouse syntax] + /// Optional `INTERPOLATE` clause (ClickHouse extension). pub interpolate: Option, } @@ -2516,10 +2896,11 @@ impl fmt::Display for OrderBy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct OrderByExpr { + /// The expression to order by. pub expr: Expr, + /// Ordering options such as `ASC`/`DESC` and `NULLS` behavior. pub options: OrderByOptions, - /// Optional: `WITH FILL` - /// Supported by [ClickHouse syntax]: + /// Optional `WITH FILL` clause (ClickHouse extension) which specifies how to fill gaps. pub with_fill: Option, } @@ -2550,9 +2931,13 @@ impl fmt::Display for OrderByExpr { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `WITH FILL` options for ClickHouse `ORDER BY` expressions. pub struct WithFill { + /// Optional lower bound expression for the fill range (`FROM `). pub from: Option, + /// Optional upper bound expression for the fill range (`TO `). pub to: Option, + /// Optional step expression specifying interpolation step (`STEP `). pub step: Option, } @@ -2579,15 +2964,20 @@ impl fmt::Display for WithFill { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An expression used by `WITH FILL`/`INTERPOLATE` to specify interpolation for a column. pub struct InterpolateExpr { + /// The column to interpolate. pub column: Ident, + /// Optional `AS ` expression specifying how to compute interpolated values. pub expr: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `INTERPOLATE` clause used with ClickHouse `WITH FILL` to compute missing values. pub struct Interpolate { + /// Optional list of interpolation expressions. pub exprs: Option>, } @@ -2604,10 +2994,11 @@ impl fmt::Display for InterpolateExpr { #[derive(Default, Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Options for an `ORDER BY` expression (ASC/DESC and NULLS FIRST/LAST). pub struct OrderByOptions { - /// Optional `ASC` or `DESC` + /// Optional `ASC` (`Some(true)`) or `DESC` (`Some(false)`). pub asc: Option, - /// Optional `NULLS FIRST` or `NULLS LAST` + /// Optional `NULLS FIRST` (`Some(true)`) or `NULLS LAST` (`Some(false)`). pub nulls_first: Option, } @@ -2630,26 +3021,26 @@ impl fmt::Display for OrderByOptions { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents the different syntactic forms of `LIMIT` clauses. pub enum LimitClause { - /// Standard SQL syntax + /// Standard SQL `LIMIT` syntax (optionally `BY` and `OFFSET`). /// /// `LIMIT [BY ,,...] [OFFSET ]` LimitOffset { - /// `LIMIT { | ALL }` + /// `LIMIT { | ALL }` expression. limit: Option, - /// `OFFSET [ { ROW | ROWS } ]` + /// Optional `OFFSET` expression with optional `ROW(S)` keyword. offset: Option, - /// `BY { ,,... } }` - /// - /// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/limit-by) + /// Optional `BY { ,... }` list used by some dialects (ClickHouse). limit_by: Vec, }, - /// [MySQL]-specific syntax; the order of expressions is reversed. - /// - /// `LIMIT , ` - /// - /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/select.html - OffsetCommaLimit { offset: Expr, limit: Expr }, + /// MySQL-specific syntax: `LIMIT , ` (order reversed). + OffsetCommaLimit { + /// The offset expression. + offset: Expr, + /// The limit expression. + limit: Expr, + }, } impl fmt::Display for LimitClause { @@ -2682,8 +3073,11 @@ impl fmt::Display for LimitClause { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `OFFSET` clause consisting of a value and a rows specifier. pub struct Offset { + /// The numeric expression following `OFFSET`. pub value: Expr, + /// Whether the offset uses `ROW`/`ROWS` or omits it. pub rows: OffsetRows, } @@ -2698,9 +3092,11 @@ impl fmt::Display for Offset { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OffsetRows { - /// Omitting ROW/ROWS is non-standard MySQL quirk. + /// Omitting `ROW`/`ROWS` entirely (non-standard MySQL quirk). None, + /// `ROW` keyword present. Row, + /// `ROWS` keyword present. Rows, } @@ -2734,45 +3130,71 @@ pub enum PipeOperator { /// Syntax: `|> LIMIT [OFFSET ]` /// /// See more at - Limit { expr: Expr, offset: Option }, + Limit { + /// The expression specifying the number of rows to return. + expr: Expr, + /// Optional offset expression provided inline with `LIMIT`. + offset: Option, + }, /// Filters the results of the input table. /// /// Syntax: `|> WHERE ` /// /// See more at - Where { expr: Expr }, + Where { + /// The filter expression. + expr: Expr, + }, /// `ORDER BY [ASC|DESC], ...` - OrderBy { exprs: Vec }, + OrderBy { + /// The ordering expressions. + exprs: Vec, + }, /// Produces a new table with the listed columns, similar to the outermost SELECT clause in a table subquery in standard syntax. /// /// Syntax `|> SELECT [[AS] alias], ...` /// /// See more at - Select { exprs: Vec }, + Select { + /// The select items to produce. + exprs: Vec, + }, /// Propagates the existing table and adds computed columns, similar to SELECT *, new_column in standard syntax. /// /// Syntax: `|> EXTEND [[AS] alias], ...` /// /// See more at - Extend { exprs: Vec }, + Extend { + /// Expressions defining added columns. + exprs: Vec, + }, /// Replaces the value of a column in the current table, similar to SELECT * REPLACE (expression AS column) in standard syntax. /// /// Syntax: `|> SET = , ...` /// /// See more at - Set { assignments: Vec }, + Set { + /// Assignments to apply (`column = expr`). + assignments: Vec, + }, /// Removes listed columns from the current table, similar to SELECT * EXCEPT (column) in standard syntax. /// /// Syntax: `|> DROP , ...` /// /// See more at - Drop { columns: Vec }, + Drop { + /// Columns to drop. + columns: Vec, + }, /// Introduces a table alias for the input table, similar to applying the AS alias clause on a table subquery in standard syntax. /// /// Syntax: `|> AS ` /// /// See more at - As { alias: Ident }, + As { + /// Alias to assign to the input table. + alias: Ident, + }, /// Performs aggregation on data across grouped rows or an entire table. /// /// Syntax: `|> AGGREGATE [[AS] alias], ...` @@ -2785,26 +3207,36 @@ pub enum PipeOperator { /// /// See more at Aggregate { + /// Expressions computed for each row prior to grouping. full_table_exprs: Vec, + /// Grouping expressions for aggregation. group_by_expr: Vec, }, /// Selects a random sample of rows from the input table. /// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT) /// See more at - TableSample { sample: Box }, + TableSample { + /// Sampling clause describing the sample. + sample: Box, + }, /// Renames columns in the input table. /// /// Syntax: `|> RENAME old_name AS new_name, ...` /// /// See more at - Rename { mappings: Vec }, + Rename { + /// Mappings of old to new identifiers. + mappings: Vec, + }, /// Combines the input table with one or more tables using UNION. /// /// Syntax: `|> UNION [ALL|DISTINCT] (), (), ...` /// /// See more at Union { + /// Set quantifier (`ALL` or `DISTINCT`). set_quantifier: SetQuantifier, + /// The queries to combine with `UNION`. queries: Vec, }, /// Returns only the rows that are present in both the input table and the specified tables. @@ -2813,7 +3245,9 @@ pub enum PipeOperator { /// /// See more at Intersect { + /// Set quantifier for the `INTERSECT` operator. set_quantifier: SetQuantifier, + /// The queries to intersect. queries: Vec, }, /// Returns only the rows that are present in the input table but not in the specified tables. @@ -2822,7 +3256,9 @@ pub enum PipeOperator { /// /// See more at Except { + /// Set quantifier for the `EXCEPT` operator. set_quantifier: SetQuantifier, + /// The queries to exclude from the input set. queries: Vec, }, /// Calls a table function or procedure that returns a table. @@ -2831,7 +3267,9 @@ pub enum PipeOperator { /// /// See more at Call { + /// The function or procedure to call which returns a table. function: Function, + /// Optional alias for the result table. alias: Option, }, /// Pivots data from rows to columns. @@ -2840,9 +3278,13 @@ pub enum PipeOperator { /// /// See more at Pivot { + /// Aggregate functions to compute during pivot. aggregate_functions: Vec, + /// Column(s) that provide the pivot values. value_column: Vec, + /// The source of pivot values (literal list or subquery). value_source: PivotValueSource, + /// Optional alias for the output. alias: Option, }, /// The `UNPIVOT` pipe operator transforms columns into rows. @@ -2854,9 +3296,13 @@ pub enum PipeOperator { /// /// See more at Unpivot { + /// Output column that will receive the unpivoted value. value_column: Ident, + /// Column name holding the unpivoted column name. name_column: Ident, + /// Columns to unpivot. unpivot_columns: Vec, + /// Optional alias for the unpivot result. alias: Option, }, /// Joins the input table with another table. @@ -3007,9 +3453,13 @@ impl PipeOperator { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `FETCH` clause options. pub struct Fetch { + /// `WITH TIES` option is present. pub with_ties: bool, + /// `PERCENT` modifier is present. pub percent: bool, + /// Optional quantity expression (e.g. `FETCH FIRST 10 ROWS`). pub quantity: Option, } @@ -3028,9 +3478,13 @@ impl fmt::Display for Fetch { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `FOR ...` locking clause. pub struct LockClause { + /// The kind of lock requested (e.g. `SHARE`, `UPDATE`). pub lock_type: LockType, + /// Optional object name after `OF` (e.g. `FOR UPDATE OF t1`). pub of: Option, + /// Optional non-blocking behavior (`NOWAIT` / `SKIP LOCKED`). pub nonblock: Option, } @@ -3050,8 +3504,11 @@ impl fmt::Display for LockClause { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The lock type used in `FOR ` clauses (e.g. `FOR SHARE`, `FOR UPDATE`). pub enum LockType { + /// `SHARE` lock (shared lock). Share, + /// `UPDATE` lock (exclusive/update lock). Update, } @@ -3068,8 +3525,11 @@ impl fmt::Display for LockType { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Non-blocking lock options for `FOR ...` clauses. pub enum NonBlock { + /// `NOWAIT` — do not wait for the lock. Nowait, + /// `SKIP LOCKED` — skip rows that are locked. SkipLocked, } @@ -3086,17 +3546,25 @@ impl fmt::Display for NonBlock { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `ALL`, `DISTINCT`, or `DISTINCT ON (...)` modifiers for `SELECT` lists. pub enum Distinct { - /// DISTINCT + /// `ALL` (keep duplicate rows) + /// + /// Generally this is the default if omitted, but omission should be represented as + /// `None::>` + All, + + /// `DISTINCT` (remove duplicate rows) Distinct, - /// DISTINCT ON({column names}) + /// `DISTINCT ON (...)` (Postgres extension) On(Vec), } impl fmt::Display for Distinct { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + Distinct::All => write!(f, "ALL"), Distinct::Distinct => write!(f, "DISTINCT"), Distinct::On(col_names) => { let col_names = display_comma_separated(col_names); @@ -3109,22 +3577,25 @@ impl fmt::Display for Distinct { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MSSQL `TOP` clause options. pub struct Top { /// SQL semantic equivalent of LIMIT but with same structure as FETCH. /// MSSQL only. pub with_ties: bool, - /// MSSQL only. + /// Apply `PERCENT` extension. pub percent: bool, + /// The optional quantity (expression or constant) following `TOP`. pub quantity: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Quantity used in a `TOP` clause: either an expression or a constant. pub enum TopQuantity { - // A parenthesized expression. MSSQL only. + /// A parenthesized expression (MSSQL syntax: `TOP (expr)`). Expr(Expr), - // An unparenthesized integer constant. + /// An unparenthesized integer constant: `TOP 10`. Constant(u64), } @@ -3148,13 +3619,15 @@ impl fmt::Display for Top { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An explicit `VALUES` clause and its rows. pub struct Values { - /// Was there an explicit ROWs keyword (MySQL)? + /// Was there an explicit `ROW` keyword (MySQL)? /// pub explicit_row: bool, - // MySql supports both VALUES and VALUE keywords. - // + /// `true` if `VALUE` (singular) keyword was used instead of `VALUES`. + /// pub value_keyword: bool, + /// The list of rows, each row is a list of expressions. pub rows: Vec>, } @@ -3179,10 +3652,15 @@ impl fmt::Display for Values { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `SELECT INTO` clause options. pub struct SelectInto { + /// `TEMPORARY` modifier. pub temporary: bool, + /// `UNLOGGED` modifier. pub unlogged: bool, + /// `TABLE` keyword present. pub table: bool, + /// Name of the target table. pub name: ObjectName, } @@ -3203,12 +3681,15 @@ impl fmt::Display for SelectInto { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Modifiers used with `GROUP BY` such as `WITH ROLLUP` or `WITH CUBE`. pub enum GroupByWithModifier { + /// `WITH ROLLUP` modifier. Rollup, + /// `WITH CUBE` modifier. Cube, + /// `WITH TOTALS` modifier (ClickHouse). Totals, - /// Hive supports GROUP BY GROUPING SETS syntax. - /// e.g. GROUP BY year , month GROUPING SETS((year,month),(year),(month)) + /// Hive supports GROUPING SETS syntax, e.g. `GROUP BY GROUPING SETS(...)`. /// /// [Hive]: GroupingSets(Expr), @@ -3230,6 +3711,8 @@ impl fmt::Display for GroupByWithModifier { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents the two syntactic forms that `GROUP BY` can take, including +/// `GROUP BY ALL` with optional modifiers and ordinary `GROUP BY `. pub enum GroupByExpr { /// ALL syntax of [Snowflake], [DuckDB] and [ClickHouse]. /// @@ -3241,8 +3724,7 @@ pub enum GroupByExpr { /// /// [ClickHouse]: All(Vec), - - /// Expressions + /// `GROUP BY ` with optional modifiers. Expressions(Vec, Vec), } @@ -3269,14 +3751,16 @@ impl fmt::Display for GroupByExpr { } } -/// FORMAT identifier or FORMAT NULL clause, specific to ClickHouse. +/// `FORMAT` identifier or `FORMAT NULL` clause, specific to ClickHouse. /// /// [ClickHouse]: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FormatClause { + /// The format identifier. Identifier(Ident), + /// `FORMAT NULL` clause. Null, } @@ -3296,7 +3780,9 @@ impl fmt::Display for FormatClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct InputFormatClause { + /// The format identifier. pub ident: Ident, + /// Optional format parameters. pub values: Vec, } @@ -3312,24 +3798,35 @@ impl fmt::Display for InputFormatClause { } } -/// FOR XML or FOR JSON clause, specific to MSSQL -/// (formats the output of a query as XML or JSON) +/// `FOR XML` or `FOR JSON` clause (MSSQL): formats the output of a query as XML or JSON. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ForClause { + /// `FOR BROWSE` clause. Browse, + /// `FOR JSON ...` clause and its options. Json { + /// JSON mode (`AUTO` or `PATH`). for_json: ForJson, + /// Optional `ROOT('...')` parameter. root: Option, + /// `INCLUDE_NULL_VALUES` flag. include_null_values: bool, + /// `WITHOUT_ARRAY_WRAPPER` flag. without_array_wrapper: bool, }, + /// `FOR XML ...` clause and its options. Xml { + /// XML mode (`RAW`, `AUTO`, `EXPLICIT`, `PATH`). for_xml: ForXml, + /// `ELEMENTS` flag. elements: bool, + /// `BINARY BASE64` flag. binary_base64: bool, + /// Optional `ROOT('...')` parameter. root: Option, + /// `TYPE` flag. r#type: bool, }, } @@ -3387,10 +3884,15 @@ impl fmt::Display for ForClause { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Modes for `FOR XML` clause. pub enum ForXml { + /// `RAW` mode with optional root name: `RAW('root')`. Raw(Option), + /// `AUTO` mode. Auto, + /// `EXPLICIT` mode. Explicit, + /// `PATH` mode with optional root: `PATH('root')`. Path(Option), } @@ -3420,8 +3922,11 @@ impl fmt::Display for ForXml { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// Modes for `FOR JSON` clause. pub enum ForJson { + /// `AUTO` mode. Auto, + /// `PATH` mode. Path, } @@ -3486,8 +3991,11 @@ impl fmt::Display for JsonTableColumn { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// A nested column in a `JSON_TABLE` column list. pub struct JsonTableNestedColumn { - pub path: Value, + /// JSON path expression (must be a literal `Value`). + pub path: ValueWithSpan, + /// Columns extracted from the matched nested array. pub columns: Vec, } @@ -3518,7 +4026,7 @@ pub struct JsonTableNamedColumn { /// The type of the column to be extracted. pub r#type: DataType, /// The path to the column to be extracted. Must be a literal string. - pub path: Value, + pub path: ValueWithSpan, /// true if the column is a boolean set to true if the given path exists pub exists: bool, /// The empty handling clause of the column @@ -3552,9 +4060,13 @@ impl fmt::Display for JsonTableNamedColumn { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// Error/empty-value handling for `JSON_TABLE` columns. pub enum JsonTableColumnErrorHandling { + /// `NULL` — return NULL when the path does not match. Null, - Default(Value), + /// `DEFAULT ` — use the provided `Value` as a default. + Default(ValueWithSpan), + /// `ERROR` — raise an error. Error, } @@ -3613,10 +4125,15 @@ impl fmt::Display for OpenJsonTableColumn { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Mode of BigQuery value tables, e.g. `AS STRUCT` or `AS VALUE`. pub enum ValueTableMode { + /// `AS STRUCT` AsStruct, + /// `AS VALUE` AsValue, + /// `DISTINCT AS STRUCT` DistinctAsStruct, + /// `DISTINCT AS VALUE` DistinctAsValue, } @@ -3719,10 +4236,14 @@ impl fmt::Display for XmlTableColumn { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// Argument passed in the `XMLTABLE PASSING` clause. pub struct XmlPassingArgument { + /// Expression to pass to the XML table. pub expr: Expr, + /// Optional alias for the argument. pub alias: Option, - pub by_value: bool, // True if BY VALUE is specified + /// `true` if `BY VALUE` is specified for the argument. + pub by_value: bool, } impl fmt::Display for XmlPassingArgument { @@ -3742,7 +4263,9 @@ impl fmt::Display for XmlPassingArgument { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// The PASSING clause for `XMLTABLE`. pub struct XmlPassingClause { + /// The list of passed arguments. pub arguments: Vec, } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index b453422c22..ad3c8a3355 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -28,25 +28,26 @@ use core::iter; use crate::tokenizer::Span; use super::{ - dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation, + comments, dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Analyze, Array, Assignment, AssignmentTarget, AttachedToken, BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements, - ConflictTarget, ConnectBy, ConstraintCharacteristics, CopySource, CreateIndex, CreateTable, + ConflictTarget, ConnectByKind, ConstraintCharacteristics, CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, ExceptSelectItem, ExcludeSelectItem, Expr, - ExprWithAlias, Fetch, FromTable, Function, FunctionArg, FunctionArgExpr, + ExprWithAlias, Fetch, ForValues, FromTable, Function, FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, GroupByExpr, HavingBound, IfStatement, IlikeSelectItem, IndexColumn, Insert, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, LimitClause, - MatchRecognizePattern, Measure, MergeAction, MergeClause, MergeInsertExpr, MergeInsertKind, - NamedParenthesizedList, NamedWindowDefinition, ObjectName, ObjectNamePart, Offset, OnConflict, - OnConflictAction, OnInsert, OpenStatement, OrderBy, OrderByExpr, OrderByKind, OutputClause, - Partition, PivotValueSource, ProjectionSelect, Query, RaiseStatement, RaiseStatementValue, - ReferentialAction, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, - SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, - TableAliasColumnDef, TableConstraint, TableFactor, TableObject, TableOptionsClustered, - TableWithJoins, Update, UpdateTableFromKind, Use, Value, Values, ViewColumnDef, WhileStatement, - WildcardAdditionalOptions, With, WithFill, + MatchRecognizePattern, Measure, Merge, MergeAction, MergeClause, MergeInsertExpr, + MergeInsertKind, MergeUpdateExpr, NamedParenthesizedList, NamedWindowDefinition, ObjectName, + ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OpenStatement, OrderBy, + OrderByExpr, OrderByKind, OutputClause, Partition, PartitionBoundValue, PivotValueSource, + ProjectionSelect, Query, RaiseStatement, RaiseStatementValue, ReferentialAction, + RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, + SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, + TableConstraint, TableFactor, TableObject, TableOptionsClustered, TableWithJoins, Update, + UpdateTableFromKind, Use, Values, ViewColumnDef, WhileStatement, WildcardAdditionalOptions, + With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -252,6 +253,7 @@ impl Spanned for Values { /// - [Statement::CreateSecret] /// - [Statement::CreateRole] /// - [Statement::AlterType] +/// - [Statement::AlterOperator] /// - [Statement::AlterRole] /// - [Statement::AttachDatabase] /// - [Statement::AttachDuckDBDatabase] @@ -302,6 +304,7 @@ impl Spanned for Values { /// - [Statement::CreateSequence] /// - [Statement::CreateType] /// - [Statement::Pragma] +/// - [Statement::Lock] /// - [Statement::LockTables] /// - [Statement::UnlockTables] /// - [Statement::Unload] @@ -401,6 +404,9 @@ impl Spanned for Statement { ), // These statements need to be implemented Statement::AlterType { .. } => Span::empty(), + Statement::AlterOperator { .. } => Span::empty(), + Statement::AlterOperatorFamily { .. } => Span::empty(), + Statement::AlterOperatorClass { .. } => Span::empty(), Statement::AlterRole { .. } => Span::empty(), Statement::AlterSession { .. } => Span::empty(), Statement::AttachDatabase { .. } => Span::empty(), @@ -451,25 +457,13 @@ impl Spanned for Statement { Statement::Explain { .. } => Span::empty(), Statement::Savepoint { .. } => Span::empty(), Statement::ReleaseSavepoint { .. } => Span::empty(), - Statement::Merge { - merge_token, - into: _, - table: _, - source: _, - on, - clauses, - output, - } => union_spans( - [merge_token.0.span, on.span()] - .into_iter() - .chain(clauses.iter().map(Spanned::span)) - .chain(output.iter().map(Spanned::span)), - ), + Statement::Merge(merge) => merge.span(), Statement::Cache { .. } => Span::empty(), Statement::UNCache { .. } => Span::empty(), Statement::CreateSequence { .. } => Span::empty(), Statement::CreateType { .. } => Span::empty(), Statement::Pragma { .. } => Span::empty(), + Statement::Lock(_) => Span::empty(), Statement::LockTables { .. } => Span::empty(), Statement::UnlockTables => Span::empty(), Statement::Unload { .. } => Span::empty(), @@ -489,7 +483,9 @@ impl Spanned for Statement { Statement::UNLISTEN { .. } => Span::empty(), Statement::RenameTable { .. } => Span::empty(), Statement::RaisError { .. } => Span::empty(), + Statement::Throw(_) => Span::empty(), Statement::Print { .. } => Span::empty(), + Statement::WaitFor(_) => Span::empty(), Statement::Return { .. } => Span::empty(), Statement::List(..) | Statement::Remove(..) => Span::empty(), Statement::ExportData(ExportData { @@ -544,6 +540,7 @@ impl Spanned for CreateTable { transient: _, // bool volatile: _, // bool iceberg: _, // bool, Snowflake specific + snapshot: _, // bool, BigQuery specific name, columns, constraints, @@ -557,13 +554,15 @@ impl Spanned for CreateTable { clone, comment: _, // todo, no span on_commit: _, - on_cluster: _, // todo, clickhouse specific - primary_key: _, // todo, clickhouse specific - order_by: _, // todo, clickhouse specific - partition_by: _, // todo, BigQuery specific - cluster_by: _, // todo, BigQuery specific - clustered_by: _, // todo, Hive specific - inherits: _, // todo, PostgreSQL specific + on_cluster: _, // todo, clickhouse specific + primary_key: _, // todo, clickhouse specific + order_by: _, // todo, clickhouse specific + partition_by: _, // todo, BigQuery specific + cluster_by: _, // todo, BigQuery specific + clustered_by: _, // todo, Hive specific + inherits: _, // todo, PostgreSQL specific + partition_of, + for_values, strict: _, // bool copy_grants: _, // bool enable_schema_evolution: _, // bool @@ -573,6 +572,7 @@ impl Spanned for CreateTable { default_ddl_collation: _, // string, no span with_aggregation_policy: _, // todo, Snowflake specific with_row_access_policy: _, // todo, Snowflake specific + with_storage_lifecycle_policy: _, // todo, Snowflake specific with_tags: _, // todo, Snowflake specific external_volume: _, // todo, Snowflake specific base_location: _, // todo, Snowflake specific @@ -586,6 +586,10 @@ impl Spanned for CreateTable { refresh_mode: _, initialize: _, require_user: _, + diststyle: _, + distkey: _, + sortkey: _, + backup: _, } = self; union_spans( @@ -594,7 +598,9 @@ impl Spanned for CreateTable { .chain(columns.iter().map(|i| i.span())) .chain(constraints.iter().map(|i| i.span())) .chain(query.iter().map(|i| i.span())) - .chain(clone.iter().map(|i| i.span())), + .chain(clone.iter().map(|i| i.span())) + .chain(partition_of.iter().map(|i| i.span())) + .chain(for_values.iter().map(|i| i.span())), ) } } @@ -628,6 +634,35 @@ impl Spanned for TableConstraint { TableConstraint::Check(constraint) => constraint.span(), TableConstraint::Index(constraint) => constraint.span(), TableConstraint::FulltextOrSpatial(constraint) => constraint.span(), + TableConstraint::PrimaryKeyUsingIndex(constraint) + | TableConstraint::UniqueUsingIndex(constraint) => constraint.span(), + } + } +} + +impl Spanned for PartitionBoundValue { + fn span(&self) -> Span { + match self { + PartitionBoundValue::Expr(expr) => expr.span(), + // MINVALUE and MAXVALUE are keywords without tracked spans + PartitionBoundValue::MinValue => Span::empty(), + PartitionBoundValue::MaxValue => Span::empty(), + } + } +} + +impl Spanned for ForValues { + fn span(&self) -> Span { + match self { + ForValues::In(exprs) => union_spans(exprs.iter().map(|e| e.span())), + ForValues::From { from, to } => union_spans( + from.iter() + .map(|v| v.span()) + .chain(to.iter().map(|v| v.span())), + ), + // WITH (MODULUS n, REMAINDER r) - u64 values have no spans + ForValues::With { .. } => Span::empty(), + ForValues::Default => Span::empty(), } } } @@ -818,7 +853,9 @@ impl Spanned for ConstraintCharacteristics { impl Spanned for Analyze { fn span(&self) -> Span { union_spans( - core::iter::once(self.table_name.span()) + self.table_name + .iter() + .map(|t| t.span()) .chain( self.partitions .iter() @@ -871,11 +908,13 @@ impl Spanned for Delete { fn span(&self) -> Span { let Delete { delete_token, + optimizer_hints: _, tables, from, using, selection, returning, + output, order_by, limit, } = self; @@ -893,6 +932,7 @@ impl Spanned for Delete { ) .chain(selection.iter().map(|i| i.span())) .chain(returning.iter().flat_map(|i| i.iter().map(|k| k.span()))) + .chain(output.iter().map(|i| i.span())) .chain(order_by.iter().map(|i| i.span())) .chain(limit.iter().map(|i| i.span())), ), @@ -904,11 +944,13 @@ impl Spanned for Update { fn span(&self) -> Span { let Update { update_token, + optimizer_hints: _, table, assignments, from, selection, returning, + output, or: _, limit, } = self; @@ -920,11 +962,23 @@ impl Spanned for Update { .chain(from.iter().map(|i| i.span())) .chain(selection.iter().map(|i| i.span())) .chain(returning.iter().flat_map(|i| i.iter().map(|k| k.span()))) + .chain(output.iter().map(|i| i.span())) .chain(limit.iter().map(|i| i.span())), ) } } +impl Spanned for Merge { + fn span(&self) -> Span { + union_spans( + [self.merge_token.0.span, self.on.span()] + .into_iter() + .chain(self.clauses.iter().map(Spanned::span)) + .chain(self.output.iter().map(Spanned::span)), + ) + } +} + impl Spanned for FromTable { fn span(&self) -> Span { match self { @@ -1087,6 +1141,8 @@ impl Spanned for AlterTableOperation { AlterTableOperation::EnableReplicaRule { name } => name.span, AlterTableOperation::EnableReplicaTrigger { name } => name.span, AlterTableOperation::EnableRowLevelSecurity => Span::empty(), + AlterTableOperation::ForceRowLevelSecurity => Span::empty(), + AlterTableOperation::NoForceRowLevelSecurity => Span::empty(), AlterTableOperation::EnableRule { name } => name.span, AlterTableOperation::EnableTrigger { name } => name.span, AlterTableOperation::RenamePartitions { @@ -1143,9 +1199,10 @@ impl Spanned for AlterTableOperation { AlterTableOperation::OwnerTo { .. } => Span::empty(), AlterTableOperation::ClusterBy { exprs } => union_spans(exprs.iter().map(|e| e.span())), AlterTableOperation::DropClusteringKey => Span::empty(), + AlterTableOperation::AlterSortKey { .. } => Span::empty(), AlterTableOperation::SuspendRecluster => Span::empty(), AlterTableOperation::ResumeRecluster => Span::empty(), - AlterTableOperation::Refresh => Span::empty(), + AlterTableOperation::Refresh { .. } => Span::empty(), AlterTableOperation::Suspend => Span::empty(), AlterTableOperation::Resume => Span::empty(), AlterTableOperation::Algorithm { .. } => Span::empty(), @@ -1254,6 +1311,7 @@ impl Spanned for Insert { fn span(&self) -> Span { let Insert { insert_token, + optimizer_hints: _, or: _, // enum, sqlite specific ignore: _, // bool into: _, // bool @@ -1267,25 +1325,31 @@ impl Spanned for Insert { has_table_keyword: _, // bool on, returning, + output, replace_into: _, // bool priority: _, // todo, mysql specific insert_alias: _, // todo, mysql specific assignments, - settings: _, // todo, clickhouse specific - format_clause: _, // todo, clickhouse specific + settings: _, // todo, clickhouse specific + format_clause: _, // todo, clickhouse specific + multi_table_insert_type: _, // snowflake multi-table insert + multi_table_into_clauses: _, // snowflake multi-table insert + multi_table_when_clauses: _, // snowflake multi-table insert + multi_table_else_clause: _, // snowflake multi-table insert } = self; union_spans( core::iter::once(insert_token.0.span) .chain(core::iter::once(table.span())) - .chain(table_alias.as_ref().map(|i| i.span)) - .chain(columns.iter().map(|i| i.span)) + .chain(table_alias.iter().map(|k| k.alias.span)) + .chain(columns.iter().map(|i| i.span())) .chain(source.as_ref().map(|q| q.span())) .chain(assignments.iter().map(|i| i.span())) .chain(partitioned.iter().flat_map(|i| i.iter().map(|k| k.span()))) .chain(after_columns.iter().map(|i| i.span)) .chain(on.as_ref().map(|i| i.span())) - .chain(returning.iter().flat_map(|i| i.iter().map(|k| k.span()))), + .chain(returning.iter().flat_map(|i| i.iter().map(|k| k.span()))) + .chain(output.iter().map(|i| i.span())), ) } } @@ -1511,6 +1575,7 @@ impl Spanned for Expr { kind: _, expr, data_type: _, + array: _, format: _, } => expr.span(), Expr::AtTimeZone { @@ -1744,6 +1809,7 @@ impl Spanned for JsonPathElem { JsonPathElem::Dot { .. } => Span::empty(), JsonPathElem::Bracket { key } => key.span(), JsonPathElem::AllElements => Span::empty(), + JsonPathElem::ColonBracket { key } => key.span(), } } } @@ -1781,6 +1847,7 @@ impl Spanned for WildcardAdditionalOptions { opt_except, opt_replace, opt_rename, + opt_alias, } = self; union_spans( @@ -1789,7 +1856,8 @@ impl Spanned for WildcardAdditionalOptions { .chain(opt_exclude.as_ref().map(|i| i.span())) .chain(opt_rename.as_ref().map(|i| i.span())) .chain(opt_replace.as_ref().map(|i| i.span())) - .chain(opt_except.as_ref().map(|i| i.span())), + .chain(opt_except.as_ref().map(|i| i.span())) + .chain(opt_alias.as_ref().map(|i| i.span)), ) } } @@ -1804,8 +1872,8 @@ impl Spanned for IlikeSelectItem { impl Spanned for ExcludeSelectItem { fn span(&self) -> Span { match self { - ExcludeSelectItem::Single(ident) => ident.span, - ExcludeSelectItem::Multiple(vec) => union_spans(vec.iter().map(|i| i.span)), + ExcludeSelectItem::Single(name) => name.span(), + ExcludeSelectItem::Multiple(vec) => union_spans(vec.iter().map(|i| i.span())), } } } @@ -1887,6 +1955,7 @@ impl Spanned for TableFactor { lateral: _, subquery, alias, + sample: _, } => subquery .span() .union_opt(&alias.as_ref().map(|alias| alias.span())), @@ -2086,6 +2155,7 @@ impl Spanned for FunctionArg { /// /// Missing spans: /// - [FunctionArgExpr::Wildcard] +/// - [FunctionArgExpr::WildcardWithOptions] impl Spanned for FunctionArgExpr { fn span(&self) -> Span { match self { @@ -2094,6 +2164,7 @@ impl Spanned for FunctionArgExpr { union_spans(object_name.0.iter().map(|i| i.span())) } FunctionArgExpr::Wildcard => Span::empty(), + FunctionArgExpr::WildcardWithOptions(_) => Span::empty(), } } } @@ -2123,13 +2194,6 @@ impl Spanned for ValueWithSpan { } } -/// The span is stored in the `ValueWrapper` struct -impl Spanned for Value { - fn span(&self) -> Span { - Span::empty() // # todo: Value needs to store spans before this is possible - } -} - impl Spanned for Join { fn span(&self) -> Span { let Join { @@ -2204,8 +2268,10 @@ impl Spanned for Select { fn span(&self) -> Span { let Select { select_token, + optimizer_hints: _, distinct: _, // todo - top: _, // todo, mysql specific + select_modifiers: _, + top: _, // todo, mysql specific projection, exclude: _, into, @@ -2235,28 +2301,34 @@ impl Spanned for Select { .chain(lateral_views.iter().map(|item| item.span())) .chain(prewhere.iter().map(|item| item.span())) .chain(selection.iter().map(|item| item.span())) + .chain(connect_by.iter().map(|item| item.span())) .chain(core::iter::once(group_by.span())) .chain(cluster_by.iter().map(|item| item.span())) .chain(distribute_by.iter().map(|item| item.span())) .chain(sort_by.iter().map(|item| item.span())) .chain(having.iter().map(|item| item.span())) .chain(named_window.iter().map(|item| item.span())) - .chain(qualify.iter().map(|item| item.span())) - .chain(connect_by.iter().map(|item| item.span())), + .chain(qualify.iter().map(|item| item.span())), ) } } -impl Spanned for ConnectBy { +impl Spanned for ConnectByKind { fn span(&self) -> Span { - let ConnectBy { - condition, - relationships, - } = self; - - union_spans( - core::iter::once(condition.span()).chain(relationships.iter().map(|item| item.span())), - ) + match self { + ConnectByKind::ConnectBy { + connect_token, + nocycle: _, + relationships, + } => union_spans( + core::iter::once(connect_token.0.span()) + .chain(relationships.last().iter().map(|item| item.span())), + ), + ConnectByKind::StartWith { + start_token, + condition, + } => union_spans([start_token.0.span(), condition.span()].into_iter()), + } } } @@ -2318,6 +2390,7 @@ impl Spanned for TableObject { union_spans(segments.iter().map(|i| i.span())) } TableObject::TableFunction(func) => func.span(), + TableObject::TableQuery(query) => query.span(), } } } @@ -2428,12 +2501,7 @@ impl Spanned for MergeAction { fn span(&self) -> Span { match self { MergeAction::Insert(expr) => expr.span(), - MergeAction::Update { - update_token, - assignments, - } => union_spans( - core::iter::once(update_token.0.span).chain(assignments.iter().map(Spanned::span)), - ), + MergeAction::Update(expr) => expr.span(), MergeAction::Delete { delete_token } => delete_token.0.span, } } @@ -2451,7 +2519,19 @@ impl Spanned for MergeInsertExpr { }, ] .into_iter() - .chain(self.columns.iter().map(|i| i.span)), + .chain(self.insert_predicate.iter().map(Spanned::span)) + .chain(self.columns.iter().map(|i| i.span())), + ) + } +} + +impl Spanned for MergeUpdateExpr { + fn span(&self) -> Span { + union_spans( + core::iter::once(self.update_token.0.span) + .chain(self.assignments.iter().map(Spanned::span)) + .chain(self.update_predicate.iter().map(Spanned::span)) + .chain(self.delete_predicate.iter().map(Spanned::span)), ) } } @@ -2479,8 +2559,15 @@ impl Spanned for OutputClause { } } +impl Spanned for comments::CommentWithSpan { + fn span(&self) -> Span { + self.span + } +} + #[cfg(test)] pub mod tests { + use crate::ast::Value; use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect}; use crate::parser::Parser; use crate::tokenizer::{Location, Span}; @@ -2763,7 +2850,7 @@ WHERE id = 1 UPDATE SET target_table.description = source_table.description WHEN MATCHED AND target_table.x != 'X' THEN DELETE - WHEN NOT MATCHED AND 1 THEN INSERT (product, quantity) ROW + WHEN NOT MATCHED AND 1 THEN INSERT (product, quantity) ROW "#; let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); @@ -2775,15 +2862,16 @@ WHERE id = 1 assert_eq!(stmt_span.end, (16, 67).into()); // ~ individual tokens within the statement - let Statement::Merge { + let Statement::Merge(Merge { merge_token, + optimizer_hints: _, into: _, table: _, source: _, on: _, clauses, output, - } = &r[0] + }) = &r[0] else { panic!("not a MERGE statement"); }; @@ -2821,10 +2909,12 @@ WHERE id = 1 clauses[1].when_token.0.span, Span::new(Location::new(12, 17), Location::new(12, 21)) ); - if let MergeAction::Update { + if let MergeAction::Update(MergeUpdateExpr { update_token, assignments: _, - } = &clauses[1].action + update_predicate: _, + delete_predicate: _, + }) = &clauses[1].action { assert_eq!( update_token.0.span, @@ -2897,7 +2987,7 @@ WHERE id = 1 ); // ~ individual tokens within the statement - if let Statement::Merge { output, .. } = &r[0] { + if let Statement::Merge(Merge { output, .. }) = &r[0] { if let Some(OutputClause::Returning { returning_token, .. }) = output @@ -2931,7 +3021,7 @@ WHERE id = 1 ); // ~ individual tokens within the statement - if let Statement::Merge { output, .. } = &r[0] { + if let Statement::Merge(Merge { output, .. }) = &r[0] { if let Some(OutputClause::Output { output_token, .. }) = output { assert_eq!( output_token.0.span, @@ -2944,4 +3034,44 @@ WHERE id = 1 panic!("not a MERGE statement"); }; } + + #[test] + fn test_merge_statement_spans_with_update_predicates() { + let sql = r#" + MERGE INTO a USING b ON a.id = b.id + WHEN MATCHED THEN + UPDATE set a.x = a.x + b.x + WHERE b.x != 2 + DELETE WHERE a.x <> 3"#; + + let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); + assert_eq!(1, r.len()); + + // ~ assert the span of the whole statement + let stmt_span = r[0].span(); + assert_eq!( + stmt_span, + Span::new(Location::new(2, 8), Location::new(6, 36)) + ); + } + + #[test] + fn test_merge_statement_spans_with_insert_predicate() { + let sql = r#" + MERGE INTO a USING b ON a.id = b.id + WHEN NOT MATCHED THEN + INSERT VALUES (b.x, b.y) WHERE b.x != 2 +-- qed +"#; + + let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); + assert_eq!(1, r.len()); + + // ~ assert the span of the whole statement + let stmt_span = r[0].span(); + assert_eq!( + stmt_span, + Span::new(Location::new(2, 8), Location::new(4, 52)) + ); + } } diff --git a/src/ast/table_constraints.rs b/src/ast/table_constraints.rs index ddf0c12539..9ba196a81e 100644 --- a/src/ast/table_constraints.rs +++ b/src/ast/table_constraints.rs @@ -101,6 +101,22 @@ pub enum TableConstraint { /// [1]: https://dev.mysql.com/doc/refman/8.0/en/fulltext-natural-language.html /// [2]: https://dev.mysql.com/doc/refman/8.0/en/spatial-types.html FulltextOrSpatial(FullTextOrSpatialConstraint), + /// PostgreSQL [definition][1] for promoting an existing unique index to a + /// `PRIMARY KEY` constraint: + /// + /// `[ CONSTRAINT constraint_name ] PRIMARY KEY USING INDEX index_name + /// [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]` + /// + /// [1]: https://www.postgresql.org/docs/current/sql-altertable.html + PrimaryKeyUsingIndex(ConstraintUsingIndex), + /// PostgreSQL [definition][1] for promoting an existing unique index to a + /// `UNIQUE` constraint: + /// + /// `[ CONSTRAINT constraint_name ] UNIQUE USING INDEX index_name + /// [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]` + /// + /// [1]: https://www.postgresql.org/docs/current/sql-altertable.html + UniqueUsingIndex(ConstraintUsingIndex), } impl From for TableConstraint { @@ -148,6 +164,8 @@ impl fmt::Display for TableConstraint { TableConstraint::Check(constraint) => constraint.fmt(f), TableConstraint::Index(constraint) => constraint.fmt(f), TableConstraint::FulltextOrSpatial(constraint) => constraint.fmt(f), + TableConstraint::PrimaryKeyUsingIndex(c) => c.fmt_with_keyword(f, "PRIMARY KEY"), + TableConstraint::UniqueUsingIndex(c) => c.fmt_with_keyword(f, "UNIQUE"), } } } @@ -155,10 +173,13 @@ impl fmt::Display for TableConstraint { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A `CHECK` constraint (`[ CONSTRAINT ] CHECK () [[NOT] ENFORCED]`). pub struct CheckConstraint { + /// Optional constraint name. pub name: Option, + /// The boolean expression the CHECK constraint enforces. pub expr: Box, - /// MySQL-specific syntax + /// MySQL-specific `ENFORCED` / `NOT ENFORCED` flag. /// pub enforced: Option, } @@ -197,16 +218,24 @@ impl crate::ast::Spanned for CheckConstraint { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ForeignKeyConstraint { + /// Optional constraint name. pub name: Option, - /// MySQL-specific field + /// MySQL-specific index name associated with the foreign key. /// pub index_name: Option, + /// Columns in the local table that participate in the foreign key. pub columns: Vec, + /// Referenced foreign table name. pub foreign_table: ObjectName, + /// Columns in the referenced table. pub referred_columns: Vec, + /// Action to perform `ON DELETE`. pub on_delete: Option, + /// Action to perform `ON UPDATE`. pub on_update: Option, + /// Optional `MATCH` kind (FULL | PARTIAL | SIMPLE). pub match_kind: Option, + /// Optional characteristics (e.g., `DEFERRABLE`). pub characteristics: Option, } @@ -344,6 +373,7 @@ pub struct IndexConstraint { /// Referred column identifier list. pub columns: Vec, /// Optional index options such as `USING`; see [`IndexOption`]. + /// Options applied to the index (e.g., `COMMENT`, `WITH` options). pub index_options: Vec, } @@ -413,7 +443,9 @@ pub struct PrimaryKeyConstraint { pub index_type: Option, /// Identifiers of the columns that form the primary key. pub columns: Vec, + /// Optional index options such as `USING`. pub index_options: Vec, + /// Optional characteristics like `DEFERRABLE`. pub characteristics: Option, } @@ -458,6 +490,7 @@ impl crate::ast::Spanned for PrimaryKeyConstraint { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Unique constraint definition. pub struct UniqueConstraint { /// Constraint name. /// @@ -473,7 +506,9 @@ pub struct UniqueConstraint { pub index_type: Option, /// Identifiers of the columns that are unique. pub columns: Vec, + /// Optional index options such as `USING`. pub index_options: Vec, + /// Optional characteristics like `DEFERRABLE`. pub characteristics: Option, /// Optional Postgres nulls handling: `[ NULLS [ NOT ] DISTINCT ]` pub nulls_distinct: NullsDistinctOption, @@ -518,3 +553,53 @@ impl crate::ast::Spanned for UniqueConstraint { ) } } + +/// PostgreSQL constraint that promotes an existing unique index to a table constraint. +/// +/// `[ CONSTRAINT constraint_name ] { UNIQUE | PRIMARY KEY } USING INDEX index_name +/// [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]` +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ConstraintUsingIndex { + /// Optional constraint name. + pub name: Option, + /// The name of the existing unique index to promote. + pub index_name: Ident, + /// Optional characteristics like `DEFERRABLE`. + pub characteristics: Option, +} + +impl ConstraintUsingIndex { + /// Format as `[CONSTRAINT name] USING INDEX index_name [characteristics]`. + pub fn fmt_with_keyword(&self, f: &mut fmt::Formatter, keyword: &str) -> fmt::Result { + use crate::ast::ddl::{display_constraint_name, display_option_spaced}; + write!( + f, + "{}{} USING INDEX {}", + display_constraint_name(&self.name), + keyword, + self.index_name, + )?; + write!(f, "{}", display_option_spaced(&self.characteristics))?; + Ok(()) + } +} + +impl crate::ast::Spanned for ConstraintUsingIndex { + fn span(&self) -> Span { + let start = self + .name + .as_ref() + .map(|i| i.span) + .unwrap_or(self.index_name.span); + let end = self + .characteristics + .as_ref() + .map(|c| c.span()) + .unwrap_or(self.index_name.span); + start.union(&end) + } +} diff --git a/src/ast/trigger.rs b/src/ast/trigger.rs index 2c64e42393..8c189a3378 100644 --- a/src/ast/trigger.rs +++ b/src/ast/trigger.rs @@ -23,7 +23,9 @@ use super::*; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TriggerObject { + /// The trigger fires once for each row affected by the triggering event Row, + /// The trigger fires once for the triggering SQL statement Statement, } @@ -36,12 +38,14 @@ impl fmt::Display for TriggerObject { } } -/// This clause indicates whether the following relation name is for the before-image transition relation or the after-image transition relation #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// This clause indicates whether the following relation name is for the before-image transition relation or the after-image transition relation pub enum TriggerReferencingType { + /// The transition relation containing the old rows affected by the triggering statement OldTable, + /// The transition relation containing the new rows affected by the triggering statement NewTable, } @@ -59,8 +63,11 @@ impl fmt::Display for TriggerReferencingType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TriggerReferencing { + /// The referencing type (`OLD TABLE` or `NEW TABLE`). pub refer_type: TriggerReferencingType, + /// True if the `AS` keyword is present in the referencing clause. pub is_as: bool, + /// The transition relation name provided by the referencing clause. pub transition_relation_name: ObjectName, } @@ -81,9 +88,13 @@ impl fmt::Display for TriggerReferencing { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TriggerEvent { + /// Trigger on INSERT event Insert, + /// Trigger on UPDATE event, with optional list of columns Update(Vec), + /// Trigger on DELETE event Delete, + /// Trigger on TRUNCATE event Truncate, } @@ -110,9 +121,13 @@ impl fmt::Display for TriggerEvent { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TriggerPeriod { + /// The trigger fires once for each row affected by the triggering event For, + /// The trigger fires once for the triggering SQL statement After, + /// The trigger fires before the triggering event Before, + /// The trigger fires instead of the triggering event InsteadOf, } @@ -132,7 +147,9 @@ impl fmt::Display for TriggerPeriod { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TriggerExecBodyType { + /// Execute a function Function, + /// Execute a procedure Procedure, } @@ -149,7 +166,9 @@ impl fmt::Display for TriggerExecBodyType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TriggerExecBody { + /// Whether the body is a `FUNCTION` or `PROCEDURE` invocation. pub exec_type: TriggerExecBodyType, + /// Description of the function/procedure to execute. pub func_desc: FunctionDesc, } diff --git a/src/ast/value.rs b/src/ast/value.rs index fdfa6a6748..5f069f36cc 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -18,7 +18,10 @@ #[cfg(not(feature = "std"))] use alloc::string::String; -use core::fmt; +use core::{ + fmt, + ops::{Deref, DerefMut}, +}; #[cfg(feature = "bigdecimal")] use bigdecimal::BigDecimal; @@ -64,11 +67,18 @@ use sqlparser_derive::{Visit, VisitMut}; /// // convert back to `Value` /// let value: Value = value_with_span.into(); /// ``` +/// A `Value` paired with its source `Span` location. #[derive(Debug, Clone, Eq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr( + feature = "visitor", + derive(Visit, VisitMut), + visit(with = "visit_value") +)] pub struct ValueWithSpan { + /// The wrapped `Value`. pub value: Value, + /// The source `Span` covering the token(s) that produced the value. pub span: Span, } @@ -108,26 +118,37 @@ impl From for Value { } } +impl Deref for ValueWithSpan { + type Target = Value; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl DerefMut for ValueWithSpan { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.value + } +} + /// Primitive SQL values such as number and string #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr( - feature = "visitor", - derive(Visit, VisitMut), - visit(with = "visit_value") -)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Value { /// Numeric literal #[cfg(not(feature = "bigdecimal"))] Number(String, bool), #[cfg(feature = "bigdecimal")] - // HINT: use `test_utils::number` to make an instance of - // Value::Number This might help if you your tests pass locally - // but fail on CI with the `--all-features` flag enabled + /// HINT: use `test_utils::number` to make an instance of + /// Value::Number This might help if you your tests pass locally + /// but fail on CI with the `--all-features` flag enabled + /// Numeric literal (uses `BigDecimal` when the `bigdecimal` feature is enabled). Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), - // $$string value$$ (postgres syntax) + /// Dollar-quoted string literal, e.g. `$$...$$` or `$tag$...$tag$` (Postgres syntax). DollarQuotedString(DollarQuotedString), /// Triple single quoted strings: Example '''abc''' /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals) @@ -167,9 +188,16 @@ pub enum Value { TripleDoubleQuotedRawStringLiteral(String), /// N'string value' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), + /// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// X'hex value' HexStringLiteral(String), + /// Double quoted string literal, e.g. `"abc"`. DoubleQuotedString(String), /// Boolean value true or false Boolean(bool), @@ -207,14 +235,18 @@ impl Value { | Value::NationalStringLiteral(s) | Value::HexStringLiteral(s) => Some(s), Value::DollarQuotedString(s) => Some(s.value), + Value::QuoteDelimitedStringLiteral(s) => Some(s.value), + Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value), _ => None, } } + /// Attach the provided `span` to this `Value` and return `ValueWithSpan`. pub fn with_span(self, span: Span) -> ValueWithSpan { ValueWithSpan { value: self, span } } + /// Convenience for attaching an empty span to this `Value`. pub fn with_empty_span(self) -> ValueWithSpan { self.with_span(Span::empty()) } @@ -242,6 +274,8 @@ impl fmt::Display for Value { Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), + Value::QuoteDelimitedStringLiteral(v) => v.fmt(f), + Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), @@ -258,11 +292,14 @@ impl fmt::Display for Value { } } +/// A dollar-quoted string literal, e.g. `$$...$$` or `$tag$...$tag$`. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DollarQuotedString { + /// Inner string contents. pub value: String, + /// Optional tag used in the opening/closing delimiter. pub tag: Option, } @@ -279,59 +316,124 @@ impl fmt::Display for DollarQuotedString { } } +/// A quote delimited string literal, e.g. `Q'_abc_'`. +/// +/// See [Value::QuoteDelimitedStringLiteral] and/or +/// [Value::NationalQuoteDelimitedStringLiteral]. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct QuoteDelimitedString { + /// the quote start character; i.e. the character _after_ the opening `Q'` + pub start_quote: char, + /// the string literal value itself + pub value: String, + /// the quote end character; i.e. the character _before_ the closing `'` + pub end_quote: char, +} + +impl fmt::Display for QuoteDelimitedString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote) + } +} + +/// Represents the date/time fields used by functions like `EXTRACT`. +/// +/// Each variant corresponds to a supported date/time part (for example +/// `YEAR`, `MONTH`, `DAY`, etc.). The `Custom` variant allows arbitrary +/// identifiers (e.g. dialect-specific abbreviations). #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum DateTimeField { + /// `YEAR` Year, + /// `YEARS` (plural form) Years, + /// `MONTH` Month, + /// `MONTHS` (plural form) Months, - /// Week optionally followed by a WEEKDAY. - /// - /// ```sql - /// WEEK(MONDAY) - /// ``` + /// `WEEK`, optionally followed by a weekday, e.g. `WEEK(MONDAY)`. /// /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#extract) Week(Option), + /// `WEEKS` (plural form) Weeks, + /// `DAY` Day, + /// `DAYOFWEEK` DayOfWeek, + /// `DAYOFYEAR` DayOfYear, + /// `DAYS` (plural form) Days, + /// `DATE` Date, + /// `DATETIME` Datetime, + /// `HOUR` Hour, + /// `HOURS` (plural form) Hours, + /// `MINUTE` Minute, + /// `MINUTES` (plural form) Minutes, + /// `SECOND` Second, + /// `SECONDS` (plural form) Seconds, + /// `CENTURY` Century, + /// `DECADE` Decade, + /// `DOW` (day of week short form) Dow, + /// `DOY` (day of year short form) Doy, + /// `EPOCH` Epoch, + /// `ISODOW` Isodow, - IsoWeek, + /// `ISOYEAR` Isoyear, + /// `ISOWEEK` + IsoWeek, + /// `JULIAN` Julian, + /// `MICROSECOND` Microsecond, + /// `MICROSECONDS` (plural form) Microseconds, + /// `MILLENIUM` (alternate spelling) Millenium, + /// `MILLENNIUM` (alternate spelling) Millennium, + /// `MILLISECOND` Millisecond, + /// `MILLISECONDS` (plural form) Milliseconds, + /// `NANOSECOND` Nanosecond, + /// `NANOSECONDS` (plural form) Nanoseconds, + /// `QUARTER` Quarter, + /// `TIME` Time, + /// `TIMEZONE` Timezone, + /// `TIMEZONE_ABBR` TimezoneAbbr, + /// `TIMEZONE_HOUR` TimezoneHour, + /// `TIMEZONE_MINUTE` TimezoneMinute, + /// `TIMEZONE_REGION` TimezoneRegion, + /// `NODATETIME` indicates no date/time part NoDateTime, /// Arbitrary abbreviation or custom date-time part. /// @@ -399,7 +501,7 @@ impl fmt::Display for DateTimeField { } } -#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// The Unicode Standard defines four normalization forms, which are intended to eliminate @@ -491,14 +593,18 @@ impl fmt::Display for EscapeQuotedString<'_> { } } +/// Return a helper which formats `string` for inclusion inside a quoted +/// literal that uses `quote` as the delimiter. pub fn escape_quoted_string(string: &str, quote: char) -> EscapeQuotedString<'_> { EscapeQuotedString { string, quote } } +/// Convenience wrapper for escaping strings for single-quoted literals (`'`). pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> { escape_quoted_string(s, '\'') } +/// Convenience wrapper for escaping strings for double-quoted literals (`").` pub fn escape_double_quote_string(s: &str) -> EscapeQuotedString<'_> { escape_quoted_string(s, '\"') } @@ -533,6 +639,8 @@ impl fmt::Display for EscapeEscapedStringLiteral<'_> { } } +/// Return a helper which escapes characters for string literals that use +/// PostgreSQL-style escaped string literals (e.g. `E'...')`. pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> { EscapeEscapedStringLiteral(s) } @@ -568,16 +676,24 @@ impl fmt::Display for EscapeUnicodeStringLiteral<'_> { } } +/// Return a helper which escapes non-ASCII characters using `\XXXX` or +/// `\+XXXXXX` Unicode escape formats (used for `U&'...'` style literals). pub fn escape_unicode_string(s: &str) -> EscapeUnicodeStringLiteral<'_> { EscapeUnicodeStringLiteral(s) } +/// The side on which `TRIM` should be applied. +/// +/// Corresponds to `TRIM(BOTH|LEADING|TRAILING)` SQL syntax. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TrimWhereField { + /// `BOTH` (trim from both ends) Both, + /// `LEADING` (trim from start) Leading, + /// `TRAILING` (trim from end) Trailing, } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 328f925f7a..30673dfa03 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -17,7 +17,7 @@ //! Recursive visitors for ast Nodes. See [`Visitor`] for more details. -use crate::ast::{Expr, ObjectName, Query, Statement, TableFactor, Value}; +use crate::ast::{Expr, ObjectName, Query, Select, Statement, TableFactor, ValueWithSpan}; use core::ops::ControlFlow; /// A type that can be visited by a [`Visitor`]. See [`Visitor`] for @@ -32,6 +32,10 @@ use core::ops::ControlFlow; /// #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// ``` pub trait Visit { + /// Visit this node with the provided [`Visitor`]. + /// + /// Implementations should call the appropriate visitor hooks to traverse + /// child nodes and return a `ControlFlow` value to allow early exit. fn visit(&self, visitor: &mut V) -> ControlFlow; } @@ -47,6 +51,11 @@ pub trait Visit { /// #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// ``` pub trait VisitMut { + /// Mutably visit this node with the provided [`VisitorMut`]. + /// + /// Implementations should call the appropriate mutable visitor hooks to + /// traverse and allow in-place mutation of child nodes. Returning a + /// `ControlFlow` value permits early termination of the traversal. fn visit(&mut self, visitor: &mut V) -> ControlFlow; } @@ -198,6 +207,16 @@ pub trait Visitor { ControlFlow::Continue(()) } + /// Invoked for any [Select] that appear in the AST before visiting children + fn pre_visit_select(&mut self, _select: &Select) -> ControlFlow { + ControlFlow::Continue(()) + } + + /// Invoked for any [Select] that appear in the AST after visiting children + fn post_visit_select(&mut self, _select: &Select) -> ControlFlow { + ControlFlow::Continue(()) + } + /// Invoked for any relations (e.g. tables) that appear in the AST before visiting children fn pre_visit_relation(&mut self, _relation: &ObjectName) -> ControlFlow { ControlFlow::Continue(()) @@ -239,12 +258,12 @@ pub trait Visitor { } /// Invoked for any Value that appear in the AST before visiting children - fn pre_visit_value(&mut self, _value: &Value) -> ControlFlow { + fn pre_visit_value(&mut self, _value: &ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } /// Invoked for any Value that appear in the AST after visiting children - fn post_visit_value(&mut self, _value: &Value) -> ControlFlow { + fn post_visit_value(&mut self, _value: &ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } } @@ -310,6 +329,16 @@ pub trait VisitorMut { ControlFlow::Continue(()) } + /// Invoked for any [Select] that appear in the AST before visiting children + fn pre_visit_select(&mut self, _select: &mut Select) -> ControlFlow { + ControlFlow::Continue(()) + } + + /// Invoked for any [Select] that appear in the AST after visiting children + fn post_visit_select(&mut self, _select: &mut Select) -> ControlFlow { + ControlFlow::Continue(()) + } + /// Invoked for any relations (e.g. tables) that appear in the AST before visiting children fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow { ControlFlow::Continue(()) @@ -357,12 +386,12 @@ pub trait VisitorMut { } /// Invoked for any value that appear in the AST before visiting children - fn pre_visit_value(&mut self, _value: &mut Value) -> ControlFlow { + fn pre_visit_value(&mut self, _value: &mut ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } /// Invoked for any statements that appear in the AST after visiting children - fn post_visit_value(&mut self, _value: &mut Value) -> ControlFlow { + fn post_visit_value(&mut self, _value: &mut ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } } @@ -700,6 +729,16 @@ mod tests { ControlFlow::Continue(()) } + fn pre_visit_select(&mut self, select: &Select) -> ControlFlow { + self.visited.push(format!("PRE: SELECT: {select}")); + ControlFlow::Continue(()) + } + + fn post_visit_select(&mut self, select: &Select) -> ControlFlow { + self.visited.push(format!("POST: SELECT: {select}")); + ControlFlow::Continue(()) + } + fn pre_visit_relation(&mut self, relation: &ObjectName) -> ControlFlow { self.visited.push(format!("PRE: RELATION: {relation}")); ControlFlow::Continue(()) @@ -770,10 +809,12 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM table_name AS my_table", "PRE: QUERY: SELECT * FROM table_name AS my_table", + "PRE: SELECT: SELECT * FROM table_name AS my_table", "PRE: TABLE FACTOR: table_name AS my_table", "PRE: RELATION: table_name", "POST: RELATION: table_name", "POST: TABLE FACTOR: table_name AS my_table", + "POST: SELECT: SELECT * FROM table_name AS my_table", "POST: QUERY: SELECT * FROM table_name AS my_table", "POST: STATEMENT: SELECT * FROM table_name AS my_table", ], @@ -783,6 +824,7 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", "PRE: QUERY: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", + "PRE: SELECT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", @@ -797,6 +839,7 @@ mod tests { "PRE: EXPR: t2.t1_id", "POST: EXPR: t2.t1_id", "POST: EXPR: t1.id = t2.t1_id", + "POST: SELECT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", "POST: QUERY: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", "POST: STATEMENT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", ], @@ -806,20 +849,24 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT column FROM t2", + "PRE: SELECT: SELECT column FROM t2", "PRE: EXPR: column", "POST: EXPR: column", "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", "POST: TABLE FACTOR: t2", + "POST: SELECT: SELECT column FROM t2", "POST: QUERY: SELECT column FROM t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", + "POST: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "POST: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", ], @@ -829,20 +876,24 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT column FROM t2", + "PRE: SELECT: SELECT column FROM t2", "PRE: EXPR: column", "POST: EXPR: column", "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", "POST: TABLE FACTOR: t2", + "POST: SELECT: SELECT column FROM t2", "POST: QUERY: SELECT column FROM t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", + "POST: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "POST: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", ], @@ -852,24 +903,30 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", "PRE: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", + "PRE: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT column FROM t2", + "PRE: SELECT: SELECT column FROM t2", "PRE: EXPR: column", "POST: EXPR: column", "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", "POST: TABLE FACTOR: t2", + "POST: SELECT: SELECT column FROM t2", "POST: QUERY: SELECT column FROM t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", + "POST: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: SELECT: SELECT * FROM t3", "PRE: TABLE FACTOR: t3", "PRE: RELATION: t3", "POST: RELATION: t3", "POST: TABLE FACTOR: t3", + "POST: SELECT: SELECT * FROM t3", "POST: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", ], @@ -883,6 +940,7 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d) ORDER BY EMPID", "PRE: QUERY: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d) ORDER BY EMPID", + "PRE: SELECT: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d)", "PRE: TABLE FACTOR: monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d)", "PRE: TABLE FACTOR: monthly_sales", "PRE: RELATION: monthly_sales", @@ -903,6 +961,7 @@ mod tests { "PRE: EXPR: 'APR'", "POST: EXPR: 'APR'", "POST: TABLE FACTOR: monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d)", + "POST: SELECT: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d)", "PRE: EXPR: EMPID", "POST: EXPR: EMPID", "POST: QUERY: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d) ORDER BY EMPID", @@ -956,7 +1015,7 @@ mod tests { #[cfg(test)] mod visit_mut_tests { - use crate::ast::{Statement, Value, VisitMut, VisitorMut}; + use crate::ast::{Statement, Value, ValueWithSpan, VisitMut, VisitorMut}; use crate::dialect::GenericDialect; use crate::parser::Parser; use crate::tokenizer::Tokenizer; @@ -970,13 +1029,13 @@ mod visit_mut_tests { impl VisitorMut for MutatorVisitor { type Break = (); - fn pre_visit_value(&mut self, value: &mut Value) -> ControlFlow { + fn pre_visit_value(&mut self, value: &mut ValueWithSpan) -> ControlFlow { self.index += 1; - *value = Value::SingleQuotedString(format!("REDACTED_{}", self.index)); + value.value = Value::SingleQuotedString(format!("REDACTED_{}", self.index)); ControlFlow::Continue(()) } - fn post_visit_value(&mut self, _value: &mut Value) -> ControlFlow { + fn post_visit_value(&mut self, _value: &mut ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } } diff --git a/src/dialect/ansi.rs b/src/dialect/ansi.rs index ec3c095be5..89c8a9ea24 100644 --- a/src/dialect/ansi.rs +++ b/src/dialect/ansi.rs @@ -18,7 +18,8 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [ANSI SQL](https://en.wikipedia.org/wiki/SQL:2011). -#[derive(Debug)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct AnsiDialect {} impl Dialect for AnsiDialect { diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 27fd3cca3b..8fca515182 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -42,7 +42,8 @@ const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ ]; /// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct BigQueryDialect; impl Dialect for BigQueryDialect { @@ -136,7 +137,7 @@ impl Dialect for BigQueryDialect { } // See - fn supports_timestamp_versioning(&self) -> bool { + fn supports_table_versioning(&self) -> bool { true } @@ -156,4 +157,13 @@ impl Dialect for BigQueryDialect { fn supports_create_table_multi_schema_info_sources(&self) -> bool { true } + + /// See + fn supports_select_wildcard_replace(&self) -> bool { + true + } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index bdac1f57b5..87c762f0bf 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -17,8 +17,9 @@ use crate::dialect::Dialect; -// A [`Dialect`] for [ClickHouse](https://clickhouse.com/). -#[derive(Debug)] +/// A [`Dialect`] for [ClickHouse](https://clickhouse.com/). +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct ClickHouseDialect {} impl Dialect for ClickHouseDialect { @@ -100,4 +101,48 @@ impl Dialect for ClickHouseDialect { fn supports_nested_comments(&self) -> bool { true } + + /// See + fn supports_optimize_table(&self) -> bool { + true + } + + /// See + fn supports_prewhere(&self) -> bool { + true + } + + /// See + fn supports_with_fill(&self) -> bool { + true + } + + /// See + fn supports_limit_by(&self) -> bool { + true + } + + /// See + fn supports_interpolate(&self) -> bool { + true + } + + /// See + fn supports_settings(&self) -> bool { + true + } + + /// See + fn supports_select_format(&self) -> bool { + true + } + + /// See + fn supports_select_wildcard_replace(&self) -> bool { + true + } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 18ee3e0aaf..57b84dbb9f 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -22,7 +22,8 @@ use crate::tokenizer::Token; /// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/) /// /// See . -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct DatabricksDialect; impl Dialect for DatabricksDialect { @@ -62,6 +63,11 @@ impl Dialect for DatabricksDialect { true } + /// + fn supports_table_versioning(&self) -> bool { + true + } + fn supports_lambda_functions(&self) -> bool { true } @@ -94,4 +100,19 @@ impl Dialect for DatabricksDialect { fn supports_group_by_with_modifier(&self) -> bool { true } + + /// See + fn supports_values_as_table_factor(&self) -> bool { + true + } + + /// See + fn supports_optimize_table(&self) -> bool { + true + } + + /// See + fn supports_bang_not_operator(&self) -> bool { + true + } } diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index f08d827b94..e70efd6954 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -18,7 +18,8 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [DuckDB](https://duckdb.org/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct DuckDbDialect; // In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. @@ -43,6 +44,10 @@ impl Dialect for DuckDbDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + fn supports_named_fn_args_with_eq_operator(&self) -> bool { true } @@ -109,4 +114,23 @@ impl Dialect for DuckDbDialect { fn supports_notnull_operator(&self) -> bool { true } + + /// See + fn supports_install(&self) -> bool { + true + } + + /// See + fn supports_detach(&self) -> bool { + true + } + + /// See + fn supports_select_wildcard_replace(&self) -> bool { + true + } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index dffc5b5276..1d5461fec1 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -19,7 +19,8 @@ use crate::dialect::Dialect; /// A permissive, general purpose [`Dialect`], which parses a wide variety of SQL /// statements, from many different dialects. -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct GenericDialect; impl Dialect for GenericDialect { @@ -104,6 +105,22 @@ impl Dialect for GenericDialect { true } + fn supports_extract_comma_syntax(&self) -> bool { + true + } + + fn supports_create_view_comment_syntax(&self) -> bool { + true + } + + fn supports_parens_around_table_factor(&self) -> bool { + true + } + + fn supports_values_as_table_factor(&self) -> bool { + true + } + fn supports_create_index_with_clause(&self) -> bool { true } @@ -132,6 +149,10 @@ impl Dialect for GenericDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + fn supports_comment_on(&self) -> bool { true } @@ -156,6 +177,10 @@ impl Dialect for GenericDialect { true } + fn supports_multiline_comment_hints(&self) -> bool { + true + } + fn supports_user_host_grantee(&self) -> bool { true } @@ -195,4 +220,72 @@ impl Dialect for GenericDialect { fn supports_interval_options(&self) -> bool { true } + + fn supports_quote_delimited_string(&self) -> bool { + true + } + + fn supports_select_wildcard_replace(&self) -> bool { + true + } + + fn supports_select_wildcard_ilike(&self) -> bool { + true + } + + fn supports_select_wildcard_rename(&self) -> bool { + true + } + + fn supports_optimize_table(&self) -> bool { + true + } + + fn supports_install(&self) -> bool { + true + } + + fn supports_detach(&self) -> bool { + true + } + + fn supports_prewhere(&self) -> bool { + true + } + + fn supports_with_fill(&self) -> bool { + true + } + + fn supports_limit_by(&self) -> bool { + true + } + + fn supports_interpolate(&self) -> bool { + true + } + + fn supports_settings(&self) -> bool { + true + } + + fn supports_select_format(&self) -> bool { + true + } + + fn supports_comment_optimizer_hint(&self) -> bool { + true + } + + fn supports_constraint_keyword_without_name(&self) -> bool { + true + } + + fn supports_key_column_option(&self) -> bool { + true + } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 3e15d395b1..b39232ad52 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -18,7 +18,8 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [Hive](https://hive.apache.org/). -#[derive(Debug)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct HiveDialect {} impl Dialect for HiveDialect { diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b42f76bdcd..69df493c7c 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -49,8 +49,85 @@ pub use self::mysql::MySqlDialect; pub use self::oracle::OracleDialect; pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; +pub use self::snowflake::parse_snowflake_stage_name; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; + +/// Macro for streamlining the creation of derived `Dialect` objects. +/// The generated struct includes `new()` and `default()` constructors. +/// Requires the `derive-dialect` feature. +/// +/// # Syntax +/// +/// ```text +/// derive_dialect!(NewDialect, BaseDialect); +/// derive_dialect!(NewDialect, BaseDialect, overrides = { method = value, ... }); +/// derive_dialect!(NewDialect, BaseDialect, preserve_type_id = true); +/// derive_dialect!(NewDialect, BaseDialect, preserve_type_id = true, overrides = { ... }); +/// ``` +/// +/// # Example +/// +/// ``` +/// use sqlparser::derive_dialect; +/// use sqlparser::dialect::{Dialect, GenericDialect}; +/// +/// // Override boolean methods (supports_*, allow_*, etc.) +/// derive_dialect!(CustomDialect, GenericDialect, overrides = { +/// supports_order_by_all = true, +/// supports_nested_comments = true, +/// }); +/// +/// let dialect = CustomDialect::new(); +/// assert!(dialect.supports_order_by_all()); +/// assert!(dialect.supports_nested_comments()); +/// ``` +/// +/// # Overriding `identifier_quote_style` +/// +/// Use a char literal or `None`: +/// ``` +/// use sqlparser::derive_dialect; +/// use sqlparser::dialect::{Dialect, PostgreSqlDialect}; +/// +/// derive_dialect!(BacktickPostgreSqlDialect, PostgreSqlDialect, +/// preserve_type_id = true, +/// overrides = { identifier_quote_style = '`' } +/// ); +/// let d: &dyn Dialect = &BacktickPostgreSqlDialect::new(); +/// assert_eq!(d.identifier_quote_style("foo"), Some('`')); +/// +/// derive_dialect!(QuotelessPostgreSqlDialect, PostgreSqlDialect, +/// preserve_type_id = true, +/// overrides = { identifier_quote_style = None } +/// ); +/// let d: &dyn Dialect = &QuotelessPostgreSqlDialect::new(); +/// assert_eq!(d.identifier_quote_style("foo"), None); +/// ``` +/// +/// # Type Identity +/// +/// By default, derived dialects have their own `TypeId`. Set `preserve_type_id = true` to +/// retain the base dialect's identity with respect to the parser's `dialect.is::()` checks: +/// ``` +/// use sqlparser::derive_dialect; +/// use sqlparser::dialect::{Dialect, GenericDialect}; +/// +/// derive_dialect!(EnhancedGenericDialect, GenericDialect, +/// preserve_type_id = true, +/// overrides = { +/// supports_order_by_all = true, +/// supports_nested_comments = true, +/// } +/// ); +/// let d: &dyn Dialect = &EnhancedGenericDialect::new(); +/// assert!(d.is::()); // still recognized as a GenericDialect +/// assert!(d.supports_nested_comments()); +/// assert!(d.supports_order_by_all()); +/// ``` +#[cfg(feature = "derive-dialect")] +pub use sqlparser_derive::derive_dialect; + use crate::ast::{ColumnOption, Expr, GranteesType, Ident, ObjectNamePart, Statement}; pub use crate::keywords; use crate::keywords::Keyword; @@ -62,14 +139,14 @@ use alloc::boxed::Box; /// Convenience check if a [`Parser`] uses a certain dialect. /// -/// Note: when possible please the new style, adding a method to the [`Dialect`] -/// trait rather than using this macro. +/// Note: when possible, please use the new style, adding a method to +/// the [`Dialect`] trait rather than using this macro. /// /// The benefits of adding a method on `Dialect` over this macro are: /// 1. user defined [`Dialect`]s can customize the parsing behavior /// 2. The differences between dialects can be clearly documented in the trait /// -/// `dialect_of!(parser is SQLiteDialect | GenericDialect)` evaluates +/// `dialect_of!(parser is SQLiteDialect | GenericDialect)` evaluates /// to `true` if `parser.dialect` is one of the [`Dialect`]s specified. macro_rules! dialect_of { ( $parsed_dialect: ident is $($dialect_type: ty)|+ ) => { @@ -123,9 +200,8 @@ macro_rules! dialect_is { pub trait Dialect: Debug + Any { /// Determine the [`TypeId`] of this dialect. /// - /// By default, return the same [`TypeId`] as [`Any::type_id`]. Can be overridden - /// by dialects that behave like other dialects - /// (for example when wrapping a dialect). + /// By default, return the same [`TypeId`] as [`Any::type_id`]. Can be overridden by + /// dialects that behave like other dialects (for example, when wrapping a dialect). fn dialect(&self) -> TypeId { self.type_id() } @@ -489,6 +565,19 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports concatenating string literals with a newline. + /// For example, the following statement would return `true`: + /// ```sql + /// SELECT 'abc' in ( + /// 'a' + /// 'b' + /// 'c' + /// ); + /// ``` + fn supports_string_literal_concatenation_with_newline(&self) -> bool { + false + } + /// Does the dialect support trailing commas in the projection list? fn supports_projection_trailing_commas(&self) -> bool { self.supports_trailing_commas() @@ -603,13 +692,26 @@ pub trait Dialect: Debug + Any { false } - /// Return true if the dialect supports specifying multiple options + /// Returns true if the dialect supports specifying multiple options /// in a `CREATE TABLE` statement for the structure of the new table. For example: /// `CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a` fn supports_create_table_multi_schema_info_sources(&self) -> bool { false } + /// Returns true if the dialect supports MySQL-specific SELECT modifiers + /// like `HIGH_PRIORITY`, `STRAIGHT_JOIN`, `SQL_SMALL_RESULT`, etc. + /// + /// For example: + /// ```sql + /// SELECT HIGH_PRIORITY STRAIGHT_JOIN SQL_SMALL_RESULT * FROM t1 JOIN t2 ON ... + /// ``` + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/select.html) + fn supports_select_modifiers(&self) -> bool { + false + } + /// Dialect-specific infix parser override /// /// This method is called to parse the next infix expression. @@ -654,17 +756,17 @@ pub trait Dialect: Debug + Any { }; } - let token = parser.peek_token(); + let token = parser.peek_token_ref(); debug!("get_next_precedence_full() {token:?}"); - match token.token { + match &token.token { Token::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)), Token::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)), Token::Word(w) if w.keyword == Keyword::XOR => Ok(p!(Xor)), Token::Word(w) if w.keyword == Keyword::AT => { match ( - parser.peek_nth_token(1).token, - parser.peek_nth_token(2).token, + &parser.peek_nth_token_ref(1).token, + &parser.peek_nth_token_ref(2).token, ) { (Token::Word(w), Token::Word(w2)) if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => @@ -675,28 +777,30 @@ pub trait Dialect: Debug + Any { } } - Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token { - // The precedence of NOT varies depending on keyword that - // follows it. If it is followed by IN, BETWEEN, or LIKE, - // it takes on the precedence of those tokens. Otherwise, it - // is not an infix operator, and therefore has zero - // precedence. - Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)), - Token::Word(w) - if w.keyword == Keyword::NULL && !parser.in_column_definition_state() => - { - Ok(p!(Is)) + Token::Word(w) if w.keyword == Keyword::NOT => { + match &parser.peek_nth_token_ref(1).token { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise, it + // is not an infix operator, and therefore has zero + // precedence. + Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)), + Token::Word(w) + if w.keyword == Keyword::NULL && !parser.in_column_definition_state() => + { + Ok(p!(Is)) + } + _ => Ok(self.prec_unknown()), } - _ => Ok(self.prec_unknown()), - }, + } Token::Word(w) if w.keyword == Keyword::NOTNULL && self.supports_notnull_operator() => { Ok(p!(Is)) } @@ -759,6 +863,13 @@ pub trait Dialect: Debug + Any { Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => { Ok(p!(DoubleColon)) } + Token::Colon => match &parser.peek_nth_token_ref(1).token { + // When colon is followed by a string or a number, it's usually in MAP syntax. + Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()), + // In other cases, it's used in semi-structured data traversal like in variant or JSON + // string columns. See `JsonAccess`. + _ => Ok(p!(Colon)), + }, Token::Arrow | Token::LongArrow | Token::HashArrow @@ -812,6 +923,7 @@ pub trait Dialect: Debug + Any { Precedence::Ampersand => 23, Precedence::Caret => 22, Precedence::Pipe => 21, + Precedence::Colon => 21, Precedence::Between => 20, Precedence::Eq => 20, Precedence::Like => 19, @@ -848,6 +960,87 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if this dialect supports the `EXTRACT` function + /// with a comma separator instead of `FROM`. + /// + /// Example: + /// ```sql + /// SELECT EXTRACT(YEAR, date_column) FROM table; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/functions/extract) + fn supports_extract_comma_syntax(&self) -> bool { + false + } + + /// Returns true if this dialect supports a subquery passed to a function + /// as the only argument without enclosing parentheses. + /// + /// Example: + /// ```sql + /// SELECT FLATTEN(SELECT * FROM tbl); + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/functions/flatten) + fn supports_subquery_as_function_arg(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `COMMENT` clause in + /// `CREATE VIEW` statements using the `COMMENT = 'comment'` syntax. + /// + /// Example: + /// ```sql + /// CREATE VIEW v COMMENT = 'my comment' AS SELECT 1; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/create-view#optional-parameters) + fn supports_create_view_comment_syntax(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `ARRAY` type without + /// specifying an element type. + /// + /// Example: + /// ```sql + /// CREATE TABLE t (a ARRAY); + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/data-types-semistructured#array) + fn supports_array_typedef_without_element_type(&self) -> bool { + false + } + + /// Returns true if this dialect supports extra parentheses around + /// lone table names or derived tables in the `FROM` clause. + /// + /// Example: + /// ```sql + /// SELECT * FROM (mytable); + /// SELECT * FROM ((SELECT 1)); + /// SELECT * FROM (mytable) AS alias; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constructs/from) + fn supports_parens_around_table_factor(&self) -> bool { + false + } + + /// Returns true if this dialect supports `VALUES` as a table factor + /// without requiring parentheses around the entire clause. + /// + /// Example: + /// ```sql + /// SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2); + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constructs/values) + /// [Databricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-values.html) + fn supports_values_as_table_factor(&self) -> bool { + false + } + /// Returns true if this dialect allows dollar placeholders /// e.g. `SELECT $var` (SQLite) fn supports_dollar_placeholder(&self) -> bool { @@ -881,10 +1074,14 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports `EXPLAIN` statements with utility options + /// e.g. `EXPLAIN (ANALYZE TRUE, BUFFERS TRUE) SELECT * FROM tbl;` fn supports_explain_with_utility_options(&self) -> bool { false } + /// Returns true if the dialect supports `ASC` and `DESC` in column definitions + /// e.g. `CREATE TABLE t (a INT ASC, b INT DESC);` fn supports_asc_desc_in_column_definition(&self) -> bool { false } @@ -894,12 +1091,23 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports `<<` and `>>` shift operators. + fn supports_bitwise_shift_operators(&self) -> bool { + false + } + /// Returns true if the dialect supports nested comments /// e.g. `/* /* nested */ */` fn supports_nested_comments(&self) -> bool { false } + /// Returns true if the dialect supports optimizer hints in multiline comments + /// e.g. `/*!50110 KEY_BLOCK_SIZE = 1024*/` + fn supports_multiline_comment_hints(&self) -> bool { + false + } + /// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem` /// as an alias assignment operator, rather than a boolean expression. /// For example: the following statements are equivalent for such a dialect: @@ -970,6 +1178,35 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports the `CONSTRAINT` keyword without a name + /// in table constraint definitions. + /// + /// Example: + /// ```sql + /// CREATE TABLE t (a INT, CONSTRAINT CHECK (a > 0)) + /// ``` + /// + /// This is a MySQL extension; the SQL standard requires a name after `CONSTRAINT`. + /// When the name is omitted, the output normalizes to just the constraint type + /// without the `CONSTRAINT` keyword (e.g., `CHECK (a > 0)`). + /// + /// + fn supports_constraint_keyword_without_name(&self) -> bool { + false + } + + /// Returns true if the dialect supports the `KEY` keyword as part of + /// column-level constraints in a `CREATE TABLE` statement. + /// + /// When enabled, the parser accepts these MySQL-specific column options: + /// - `UNIQUE [KEY]` — optional `KEY` after `UNIQUE` + /// - `[PRIMARY] KEY` — standalone `KEY` as shorthand for `PRIMARY KEY` + /// + /// + fn supports_key_column_option(&self) -> bool { + false + } + /// Returns true if the dialect supports writing `[*]` to select all elements in a JSON array. fn supports_semi_structured_array_all_elements(&self) -> bool { false @@ -1015,11 +1252,23 @@ pub trait Dialect: Debug + Any { false } + /// Does the dialect support table queries in insertion? + /// + /// e.g. `SELECT INTO () ...` + fn supports_insert_table_query(&self) -> bool { + false + } + /// Does the dialect support insert formats, e.g. `INSERT INTO ... FORMAT ` fn supports_insert_format(&self) -> bool { false } + /// Returns true if this dialect supports `INSERT INTO t [[AS] alias] ...`. + fn supports_insert_table_alias(&self) -> bool { + false + } + /// Returns true if this dialect supports `SET` statements without an explicit /// assignment operator such as `=`. For example: `SET SHOWPLAN_XML ON`. fn supports_set_stmt_without_operator(&self) -> bool { @@ -1060,7 +1309,7 @@ pub trait Dialect: Debug + Any { /// Returns true if this dialect supports querying historical table data /// by specifying which version of the data to query. - fn supports_timestamp_versioning(&self) -> bool { + fn supports_table_versioning(&self) -> bool { false } @@ -1118,6 +1367,13 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports space-separated column options + /// in a `CREATE TABLE` statement. For example: + /// ```sql + /// CREATE TABLE tbl ( + /// col INT NOT NULL DEFAULT 0 + /// ); + /// ``` fn supports_space_separated_column_options(&self) -> bool { false } @@ -1214,33 +1470,253 @@ pub trait Dialect: Debug + Any { fn supports_semantic_view_table_factor(&self) -> bool { false } + + /// Support quote delimited string literals, e.g. `Q'{...}'` + /// + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + fn supports_quote_delimited_string(&self) -> bool { + false + } + + /// Returns `true` if the dialect supports query optimizer hints in the + /// format of single and multi line comments immediately following a + /// `SELECT`, `INSERT`, `REPLACE`, `DELETE`, or `MERGE` keyword. + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Comments.html#SQLRF-GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + fn supports_comment_optimizer_hint(&self) -> bool { + false + } + + /// Returns true if the dialect considers the `&&` operator as a boolean AND operator. + fn supports_double_ampersand_operator(&self) -> bool { + false + } + + /// Returns true if the dialect supports casting an expression to a binary type + /// using the `BINARY ` syntax. + fn supports_binary_kw_as_cast(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `REPLACE` option in a + /// `SELECT *` wildcard expression. + /// + /// Example: + /// ```sql + /// SELECT * REPLACE (col1 AS col1_alias) FROM table; + /// ``` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace) + /// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select#replace) + /// [DuckDB](https://duckdb.org/docs/sql/query_syntax/select#replace-clause) + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/select#parameters) + fn supports_select_wildcard_replace(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `ILIKE` option in a + /// `SELECT *` wildcard expression. + /// + /// Example: + /// ```sql + /// SELECT * ILIKE '%pattern%' FROM table; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/select#parameters) + fn supports_select_wildcard_ilike(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `RENAME` option in a + /// `SELECT *` wildcard expression. + /// + /// Example: + /// ```sql + /// SELECT * RENAME col1 AS col1_alias FROM table; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/select#parameters) + fn supports_select_wildcard_rename(&self) -> bool { + false + } + + /// Returns true if this dialect supports aliasing a wildcard select item. + /// + /// Example: + /// ```sql + /// SELECT t.* AS alias FROM t + /// ``` + /// + /// [Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_SELECT_list.html) + fn supports_select_wildcard_with_alias(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `OPTIMIZE TABLE` statement. + /// + /// Example: + /// ```sql + /// OPTIMIZE TABLE table_name; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) + fn supports_optimize_table(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `INSTALL` statement. + /// + /// Example: + /// ```sql + /// INSTALL extension_name; + /// ``` + /// + /// [DuckDB](https://duckdb.org/docs/extensions/overview) + fn supports_install(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `DETACH` statement. + /// + /// Example: + /// ```sql + /// DETACH DATABASE db_name; + /// ``` + /// + /// [DuckDB](https://duckdb.org/docs/sql/statements/attach#detach-syntax) + fn supports_detach(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `PREWHERE` clause + /// in `SELECT` statements. + /// + /// Example: + /// ```sql + /// SELECT * FROM table PREWHERE col > 0 WHERE col < 100; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/prewhere) + fn supports_prewhere(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `WITH FILL` clause + /// in `ORDER BY` expressions. + /// + /// Example: + /// ```sql + /// SELECT * FROM table ORDER BY col WITH FILL FROM 1 TO 10 STEP 1; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by#order-by-expr-with-fill-modifier) + fn supports_with_fill(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `LIMIT BY` clause. + /// + /// Example: + /// ```sql + /// SELECT * FROM table LIMIT 10 BY col; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/limit-by) + fn supports_limit_by(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `INTERPOLATE` clause + /// in `ORDER BY` expressions. + /// + /// Example: + /// ```sql + /// SELECT * FROM table ORDER BY col WITH FILL INTERPOLATE (col2 AS col2 + 1); + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by#order-by-expr-with-fill-modifier) + fn supports_interpolate(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `SETTINGS` clause. + /// + /// Example: + /// ```sql + /// SELECT * FROM table SETTINGS max_threads = 4; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select#settings-in-select-query) + fn supports_settings(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `FORMAT` clause in `SELECT` statements. + /// + /// Example: + /// ```sql + /// SELECT * FROM table FORMAT JSON; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/format) + fn supports_select_format(&self) -> bool { + false + } + + /// Returns true if the dialect supports the two-argument comma-separated + /// form of the `TRIM` function: `TRIM(expr, characters)`. + fn supports_comma_separated_trim(&self) -> bool { + false + } } -/// This represents the operators for which precedence must be defined +/// Operators for which precedence must be defined. /// -/// higher number -> higher precedence +/// Higher number -> higher precedence. +/// See expression parsing for how these values are used. #[derive(Debug, Clone, Copy)] pub enum Precedence { + /// Member access operator `.` (highest precedence). Period, + /// Postgres style type cast `::`. DoubleColon, + /// Timezone operator (e.g. `AT TIME ZONE`). AtTz, + /// Multiplication / Division / Modulo operators (`*`, `/`, `%`). MulDivModOp, + /// Addition / Subtraction (`+`, `-`). PlusMinus, + /// Bitwise `XOR` operator (`^`). Xor, + /// Bitwise `AND` operator (`&`). Ampersand, + /// Bitwise `CARET` (^) for some dialects. Caret, + /// Bitwise `OR` / pipe operator (`|`). Pipe, + /// `:` operator for json/variant access. + Colon, + /// `BETWEEN` operator. Between, + /// Equality operator (`=`). Eq, + /// Pattern matching (`LIKE`). Like, + /// `IS` operator (e.g. `IS NULL`). Is, + /// Other Postgres-specific operators. PgOther, + /// Unary `NOT`. UnaryNot, + /// Logical `AND`. And, + /// Logical `OR` (lowest precedence). Or, } impl dyn Dialect { + /// Returns true if `self` is the concrete dialect `T`. #[inline] pub fn is(&self) -> bool { // borrowed from `Any` implementation @@ -1331,6 +1807,27 @@ mod tests { dialect_from_str(v).unwrap() } + #[test] + #[cfg(feature = "derive-dialect")] + fn test_dialect_override() { + derive_dialect!(EnhancedGenericDialect, GenericDialect, + preserve_type_id = true, + overrides = { + supports_order_by_all = true, + supports_nested_comments = true, + supports_triple_quoted_string = true, + }, + ); + let dialect = EnhancedGenericDialect::new(); + + assert!(dialect.supports_order_by_all()); + assert!(dialect.supports_nested_comments()); + assert!(dialect.supports_triple_quoted_string()); + + let d: &dyn Dialect = &dialect; + assert!(d.is::()); + } + #[test] fn identifier_quote_style() { let tests: Vec<(&dyn Dialect, &str, Option)> = vec![ diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index e1902b3896..8ad765dd33 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -21,16 +21,15 @@ use crate::ast::{ GranteesType, IfStatement, Statement, }; use crate::dialect::Dialect; -use crate::keywords::{self, Keyword}; +use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; #[cfg(not(feature = "std"))] use alloc::{vec, vec::Vec}; -const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[Keyword::IF, Keyword::ELSE]; - /// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) -#[derive(Debug)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct MsSqlDialect {} impl Dialect for MsSqlDialect { @@ -109,7 +108,7 @@ impl Dialect for MsSqlDialect { } /// See: - fn supports_timestamp_versioning(&self) -> bool { + fn supports_table_versioning(&self) -> bool { true } @@ -128,12 +127,98 @@ impl Dialect for MsSqlDialect { &[GranteesType::Public] } - fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { - !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw) && !RESERVED_FOR_COLUMN_ALIAS.contains(kw) + fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + match kw { + // List of keywords that cannot be used as select item (column) aliases in MSSQL + // regardless of whether the alias is explicit or implicit. + // + // These are T-SQL statement-starting keywords; allowing them as implicit aliases + // causes the parser to consume the keyword as an alias for the previous expression, + // then fail on the token that follows (e.g. `TABLE`, `@var`, `sp_name`, …). + Keyword::IF + | Keyword::ELSE + | Keyword::DECLARE + | Keyword::EXEC + | Keyword::EXECUTE + | Keyword::INSERT + | Keyword::UPDATE + | Keyword::DELETE + | Keyword::DROP + | Keyword::CREATE + | Keyword::ALTER + | Keyword::TRUNCATE + | Keyword::PRINT + | Keyword::WHILE + | Keyword::RETURN + | Keyword::THROW + | Keyword::RAISERROR + | Keyword::MERGE => false, + _ => explicit || self.is_column_alias(kw, parser), + } + } + + fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + match kw { + // List of keywords that cannot be used as table aliases in MSSQL + // regardless of whether the alias is explicit or implicit. + // + // These are T-SQL statement-starting keywords. Without blocking them here, + // a bare `SELECT * FROM t` followed by a newline and one of these keywords + // would cause the parser to consume the keyword as a table alias for `t`, + // then fail on the token that follows (e.g. `@var`, `sp_name`, `TABLE`, …). + // + // `SET` is already covered by the global `RESERVED_FOR_TABLE_ALIAS` list; + // the keywords below are MSSQL-specific additions. + Keyword::IF + | Keyword::ELSE + | Keyword::DECLARE + | Keyword::EXEC + | Keyword::EXECUTE + | Keyword::INSERT + | Keyword::UPDATE + | Keyword::DELETE + | Keyword::DROP + | Keyword::CREATE + | Keyword::ALTER + | Keyword::TRUNCATE + | Keyword::PRINT + | Keyword::WHILE + | Keyword::RETURN + | Keyword::THROW + | Keyword::RAISERROR + | Keyword::MERGE => false, + _ => explicit || self.is_table_alias(kw, parser), + } } fn parse_statement(&self, parser: &mut Parser) -> Option> { - if parser.peek_keyword(Keyword::IF) { + if parser.parse_keyword(Keyword::BEGIN) { + // Check if this is a BEGIN...END block rather than BEGIN TRANSACTION + let is_block = parser + .maybe_parse(|p| { + if p.parse_transaction_modifier().is_some() + || p.parse_one_of_keywords(&[ + Keyword::TRANSACTION, + Keyword::WORK, + Keyword::TRAN, + ]) + .is_some() + || matches!(p.peek_token_ref().token, Token::SemiColon | Token::EOF) + { + p.expected_ref("statement", p.peek_token_ref()) + } else { + Ok(()) + } + }) + .unwrap_or(None) + .is_some(); + if is_block { + Some(parser.parse_begin_exception_end()) + } else { + parser.prev_token(); + None + } + } else if parser.peek_keyword(Keyword::IF) { Some(self.parse_if_stmt(parser)) } else if parser.parse_keywords(&[Keyword::CREATE, Keyword::TRIGGER]) { Some(self.parse_create_trigger(parser, false)) @@ -148,6 +233,15 @@ impl Dialect for MsSqlDialect { None } } + + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let token = parser.peek_token_ref(); + match &token.token { + // lowest prec to prevent it from turning into a binary op + Token::Colon => Some(Ok(self.prec_unknown())), + _ => None, + } + } } impl MsSqlDialect { diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 8d2a5ad4bd..6b057539e5 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -35,7 +35,8 @@ const RESERVED_FOR_TABLE_ALIAS_MYSQL: &[Keyword] = &[ ]; /// A [`Dialect`] for [MySQL](https://www.mysql.com/) -#[derive(Debug)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct MySqlDialect {} impl Dialect for MySqlDialect { @@ -84,6 +85,15 @@ impl Dialect for MySqlDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + + /// see + fn supports_multiline_comment_hints(&self) -> bool { + true + } + fn parse_infix( &self, parser: &mut crate::parser::Parser, @@ -92,10 +102,15 @@ impl Dialect for MySqlDialect { ) -> Option> { // Parse DIV as an operator if parser.parse_keyword(Keyword::DIV) { + let left = Box::new(expr.clone()); + let right = Box::new(match parser.parse_expr() { + Ok(expr) => expr, + Err(e) => return Some(Err(e)), + }); Some(Ok(Expr::BinaryOp { - left: Box::new(expr.clone()), + left, op: BinaryOperator::MyIntegerDivide, - right: Box::new(parser.parse_expr().unwrap()), + right, })) } else { None @@ -152,6 +167,10 @@ impl Dialect for MySqlDialect { true } + fn supports_select_modifiers(&self) -> bool { + true + } + fn supports_set_names(&self) -> bool { true } @@ -167,6 +186,31 @@ impl Dialect for MySqlDialect { fn supports_cross_join_constraint(&self) -> bool { true } + + /// See: + fn supports_double_ampersand_operator(&self) -> bool { + true + } + + /// Deprecated functionality by MySQL but still supported + /// See: + fn supports_binary_kw_as_cast(&self) -> bool { + true + } + + fn supports_comment_optimizer_hint(&self) -> bool { + true + } + + /// See: + fn supports_constraint_keyword_without_name(&self) -> bool { + true + } + + /// See: + fn supports_key_column_option(&self) -> bool { + true + } } /// `LOCK TABLES` @@ -205,7 +249,7 @@ fn parse_lock_tables_type(parser: &mut Parser) -> Result Option> { + let t = parser.peek_token_ref(); + debug!("get_next_precedence() {t:?}"); + + match &t.token { + Token::StringConcat => Some(Ok(self.prec_value(Precedence::PlusMinus))), + _ => None, + } + } + fn supports_group_by_expr(&self) -> bool { true } + + fn get_reserved_keywords_for_select_item_operator(&self) -> &[Keyword] { + &RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR + } + + fn supports_quote_delimited_string(&self) -> bool { + true + } + + fn supports_comment_optimizer_hint(&self) -> bool { + true + } + + fn supports_insert_table_alias(&self) -> bool { + true + } + + /// See + fn supports_insert_table_query(&self) -> bool { + true + } } diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index e861cc5153..b99a8b5c3d 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -33,8 +33,11 @@ use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; +use super::keywords::RESERVED_FOR_IDENTIFIER; + /// A [`Dialect`] for [PostgreSQL](https://www.postgresql.org/) -#[derive(Debug)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct PostgreSqlDialect {} const PERIOD_PREC: u8 = 200; @@ -80,6 +83,14 @@ impl Dialect for PostgreSqlDialect { true } + fn is_reserved_for_identifier(&self, kw: Keyword) -> bool { + if matches!(kw, Keyword::INTERVAL) { + false + } else { + RESERVED_FOR_IDENTIFIER.contains(&kw) + } + } + /// See fn is_custom_operator_part(&self, ch: char) -> bool { matches!( @@ -104,12 +115,12 @@ impl Dialect for PostgreSqlDialect { } fn get_next_precedence(&self, parser: &Parser) -> Option> { - let token = parser.peek_token(); + let token = parser.peek_token_ref(); debug!("get_next_precedence() {token:?}"); // we only return some custom value here when the behaviour (not merely the numeric value) differs // from the default implementation - match token.token { + match &token.token { Token::Word(w) if w.keyword == Keyword::COLLATE && !parser.in_column_definition_state() => { @@ -136,6 +147,8 @@ impl Dialect for PostgreSqlDialect { | Token::ShiftRight | Token::ShiftLeft | Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)), + // lowest prec to prevent it from turning into a binary op + Token::Colon => Some(Ok(self.prec_unknown())), _ => None, } } @@ -159,6 +172,7 @@ impl Dialect for PostgreSqlDialect { Precedence::Ampersand => PG_OTHER_PREC, Precedence::Caret => CARET_PREC, Precedence::Pipe => PG_OTHER_PREC, + Precedence::Colon => PG_OTHER_PREC, Precedence::Between => BETWEEN_LIKE_PREC, Precedence::Eq => EQ_PREC, Precedence::Like => BETWEEN_LIKE_PREC, @@ -199,6 +213,10 @@ impl Dialect for PostgreSqlDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + /// see fn supports_comment_on(&self) -> bool { true @@ -280,4 +298,16 @@ impl Dialect for PostgreSqlDialect { fn supports_interval_options(&self) -> bool { true } + + fn supports_insert_table_alias(&self) -> bool { + true + } + + fn supports_create_table_like_parenthesized(&self) -> bool { + true + } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 1cd6098a6c..5969ee55e6 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -22,7 +22,8 @@ use core::str::Chars; use super::PostgreSqlDialect; /// A [`Dialect`] for [RedShift](https://aws.amazon.com/redshift/) -#[derive(Debug)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct RedshiftSqlDialect {} // In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. @@ -120,6 +121,10 @@ impl Dialect for RedshiftSqlDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + fn supports_array_typedef_with_brackets(&self) -> bool { true } @@ -136,6 +141,10 @@ impl Dialect for RedshiftSqlDialect { true } + fn supports_select_wildcard_with_alias(&self) -> bool { + true + } + fn supports_select_exclude(&self) -> bool { true } @@ -143,4 +152,8 @@ impl Dialect for RedshiftSqlDialect { fn supports_create_table_like_parenthesized(&self) -> bool { true } + + fn supports_string_literal_concatenation_with_newline(&self) -> bool { + true + } } diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 4cfaddceb3..1ac21d0073 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -28,16 +28,19 @@ use crate::ast::helpers::stmt_data_loading::{ }; use crate::ast::{ AlterTable, AlterTableOperation, AlterTableType, CatalogSyncNamespaceMode, ColumnOption, - ColumnPolicy, ColumnPolicyProperty, ContactEntry, CopyIntoSnowflakeKind, CreateTableLikeKind, - DollarQuotedString, Ident, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, - IdentityPropertyKind, IdentityPropertyOrder, InitializeKind, ObjectName, ObjectNamePart, - RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, Statement, - StorageSerializationPolicy, TagsColumnOption, Value, WrappedCollection, + ColumnPolicy, ColumnPolicyProperty, ContactEntry, CopyIntoSnowflakeKind, CreateTable, + CreateTableLikeKind, DollarQuotedString, Ident, IdentityParameters, IdentityProperty, + IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, InitializeKind, + Insert, MultiTableInsertIntoClause, MultiTableInsertType, MultiTableInsertValue, + MultiTableInsertValues, MultiTableInsertWhenClause, ObjectName, ObjectNamePart, + RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, Statement, StorageLifecyclePolicy, + StorageSerializationPolicy, TableObject, TagsColumnOption, Value, WrappedCollection, }; use crate::dialect::{Dialect, Precedence}; use crate::keywords::Keyword; use crate::parser::{IsOptional, Parser, ParserError}; -use crate::tokenizer::Token; +use crate::tokenizer::TokenWithSpan; +use crate::tokenizer::{Span, Token}; #[cfg(not(feature = "std"))] use alloc::boxed::Box; #[cfg(not(feature = "std"))] @@ -127,7 +130,8 @@ const RESERVED_KEYWORDS_FOR_TABLE_FACTOR: &[Keyword] = &[ ]; /// A [`Dialect`] for [Snowflake](https://www.snowflake.com/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct SnowflakeDialect; impl Dialect for SnowflakeDialect { @@ -211,8 +215,49 @@ impl Dialect for SnowflakeDialect { true } + /// See [doc](https://docs.snowflake.com/en/sql-reference/functions/extract) + fn supports_extract_comma_syntax(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/functions/flatten) + fn supports_subquery_as_function_arg(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/sql/create-view#optional-parameters) + fn supports_create_view_comment_syntax(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/data-types-semistructured#array) + fn supports_array_typedef_without_element_type(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/constructs/from) + fn supports_parens_around_table_factor(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/constructs/values) + fn supports_values_as_table_factor(&self) -> bool { + true + } + fn parse_statement(&self, parser: &mut Parser) -> Option> { if parser.parse_keyword(Keyword::BEGIN) { + // Snowflake supports both `BEGIN TRANSACTION` and `BEGIN ... END` blocks. + // If the next keyword indicates a transaction statement, let the + // standard parse_begin() handle it. + if parser + .peek_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK, Keyword::NAME]) + .is_some() + || matches!(parser.peek_token_ref().token, Token::SemiColon | Token::EOF) + { + parser.prev_token(); + return None; + } return Some(parser.parse_begin_exception_end()); } @@ -221,12 +266,17 @@ impl Dialect for SnowflakeDialect { return Some(parse_alter_dynamic_table(parser)); } + if parser.parse_keywords(&[Keyword::ALTER, Keyword::EXTERNAL, Keyword::TABLE]) { + // ALTER EXTERNAL TABLE + return Some(parse_alter_external_table(parser)); + } + if parser.parse_keywords(&[Keyword::ALTER, Keyword::SESSION]) { // ALTER SESSION let set = match parser.parse_one_of_keywords(&[Keyword::SET, Keyword::UNSET]) { Some(Keyword::SET) => true, Some(Keyword::UNSET) => false, - _ => return Some(parser.expected("SET or UNSET", parser.peek_token())), + _ => return Some(parser.expected_ref("SET or UNSET", parser.peek_token_ref())), }; return Some(parse_alter_session(parser, set)); } @@ -267,9 +317,13 @@ impl Dialect for SnowflakeDialect { // OK - this is CREATE STAGE statement return Some(parse_create_stage(or_replace, temporary, parser)); } else if parser.parse_keyword(Keyword::TABLE) { - return Some(parse_create_table( - or_replace, global, temporary, volatile, transient, iceberg, dynamic, parser, - )); + return Some( + parse_create_table( + or_replace, global, temporary, volatile, transient, iceberg, dynamic, + parser, + ) + .map(Into::into), + ); } else if parser.parse_keyword(Keyword::DATABASE) { return Some(parse_create_database(or_replace, transient, parser)); } else { @@ -313,6 +367,33 @@ impl Dialect for SnowflakeDialect { parser.prev_token(); } + // Check for multi-table INSERT + // `INSERT [OVERWRITE] ALL ... or INSERT [OVERWRITE] FIRST ...` + if parser.parse_keyword(Keyword::INSERT) { + let insert_token = parser.get_current_token().clone(); + let overwrite = parser.parse_keyword(Keyword::OVERWRITE); + + // Check for ALL or FIRST keyword + if let Some(kw) = parser.parse_one_of_keywords(&[Keyword::ALL, Keyword::FIRST]) { + let multi_table_insert_type = match kw { + Keyword::FIRST => MultiTableInsertType::First, + _ => MultiTableInsertType::All, + }; + return Some(parse_multi_table_insert( + parser, + insert_token, + overwrite, + multi_table_insert_type, + )); + } + + // Not a multi-table insert, rewind + if overwrite { + parser.prev_token(); // rewind OVERWRITE + } + parser.prev_token(); // rewind INSERT + } + None } @@ -347,9 +428,9 @@ impl Dialect for SnowflakeDialect { } fn get_next_precedence(&self, parser: &Parser) -> Option> { - let token = parser.peek_token(); + let token = parser.peek_token_ref(); // Snowflake supports the `:` cast operator unlike other dialects - match token.token { + match &token.token { Token::Colon => Some(Ok(self.prec_value(Precedence::DoubleColon))), _ => None, } @@ -531,7 +612,7 @@ impl Dialect for SnowflakeDialect { } /// See: - fn supports_timestamp_versioning(&self) -> bool { + fn supports_table_versioning(&self) -> bool { true } @@ -577,6 +658,30 @@ impl Dialect for SnowflakeDialect { fn supports_semantic_view_table_factor(&self) -> bool { true } + + /// See + fn supports_select_wildcard_replace(&self) -> bool { + true + } + + /// See + fn supports_select_wildcard_ilike(&self) -> bool { + true + } + + /// See + fn supports_select_wildcard_rename(&self) -> bool { + true + } + + /// See + fn supports_lambda_functions(&self) -> bool { + true + } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } // Peeks ahead to identify tokens that are expected after @@ -619,15 +724,15 @@ fn parse_alter_dynamic_table(parser: &mut Parser) -> Result Result +fn parse_alter_external_table(parser: &mut Parser) -> Result { + let if_exists = parser.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let table_name = parser.parse_object_name(true)?; + + // Parse the operation (REFRESH for now) + let operation = if parser.parse_keyword(Keyword::REFRESH) { + // Optional subpath for refreshing specific partitions + let subpath = match parser.peek_token().token { + Token::SingleQuotedString(s) => { + parser.next_token(); + Some(s) + } + _ => None, + }; + AlterTableOperation::Refresh { subpath } + } else { + return parser.expected_ref( + "REFRESH after ALTER EXTERNAL TABLE", + parser.peek_token_ref(), + ); + }; + + let end_token = if parser.peek_token_ref().token == Token::SemiColon { + parser.peek_token_ref().clone() + } else { + parser.get_current_token().clone() + }; + + Ok(Statement::AlterTable(AlterTable { + name: table_name, + if_exists, + only: false, + operations: vec![operation], + location: None, + on_cluster: None, + table_type: Some(AlterTableType::External), + end_token: AttachedToken(end_token), + })) +} + /// Parse snowflake alter session. /// fn parse_alter_session(parser: &mut Parser, set: bool) -> Result { @@ -675,7 +822,7 @@ pub fn parse_create_table( iceberg: bool, dynamic: bool, parser: &mut Parser, -) -> Result { +) -> Result { let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = parser.parse_object_name(false)?; @@ -770,6 +917,7 @@ pub fn parse_create_table( Keyword::WITH => { parser.expect_one_of_keywords(&[ Keyword::AGGREGATION, + Keyword::STORAGE, Keyword::TAG, Keyword::ROW, ])?; @@ -791,6 +939,19 @@ pub fn parse_create_table( builder = builder.with_row_access_policy(Some(RowAccessPolicy::new(policy, columns))) } + Keyword::STORAGE => { + parser.expect_keywords(&[Keyword::LIFECYCLE, Keyword::POLICY])?; + let policy = parser.parse_object_name(false)?; + parser.expect_keyword_is(Keyword::ON)?; + parser.expect_token(&Token::LParen)?; + let columns = parser.parse_comma_separated(|p| p.parse_identifier())?; + parser.expect_token(&Token::RParen)?; + + builder = builder.with_storage_lifecycle_policy(Some(StorageLifecyclePolicy { + policy, + on: columns, + })) + } Keyword::TAG => { parser.expect_token(&Token::LParen)?; let tags = parser.parse_comma_separated(Parser::parse_tag)?; @@ -1101,7 +1262,7 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result { + Token::LParen | Token::RParen => { parser.prev_token(); break; } @@ -1111,14 +1272,18 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result ident.push('/'), Token::Plus => ident.push('+'), Token::Minus => ident.push('-'), + Token::Eq => ident.push('='), + Token::Colon => ident.push(':'), Token::Number(n, _) => ident.push_str(n), Token::Word(w) => ident.push_str(&w.to_string()), - _ => return parser.expected("stage name identifier", parser.peek_token()), + _ => return parser.expected_ref("stage name identifier", parser.peek_token_ref()), } } Ok(Ident::new(ident)) } +/// Parses a Snowflake stage name, which may start with `@` for internal stages. +/// Examples: `@mystage`, `@namespace.stage`, `schema.table` pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result { match parser.next_token().token { Token::AtSign => { @@ -1142,7 +1307,7 @@ pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result` /// and `COPY INTO ` which have different syntax. pub fn parse_copy_into(parser: &mut Parser) -> Result { - let kind = match parser.peek_token().token { + let kind = match &parser.peek_token_ref().token { // Indicates an internal stage Token::AtSign => CopyIntoSnowflakeKind::Location, // Indicates an external stage, i.e. s3://, gcs:// or azure:// @@ -1215,7 +1380,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { from_stage_alias = if parser.parse_keyword(Keyword::AS) { Some(match parser.next_token().token { Token::Word(w) => Ok(Ident::new(w.value)), - _ => parser.expected("stage alias", parser.peek_token()), + _ => parser.expected_ref("stage alias", parser.peek_token_ref()), }?) } else { None @@ -1273,7 +1438,10 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { // In `COPY INTO ` the copy options do not have a shared key // like in `COPY INTO
` Token::Word(key) => copy_options.push(parser.parse_key_value_option(&key)?), - _ => return parser.expected("another copy option, ; or EOF'", parser.peek_token()), + _ => { + return parser + .expected_ref("another copy option, ; or EOF'", parser.peek_token_ref()) + } } } } @@ -1368,7 +1536,7 @@ fn parse_select_item_for_data_load( // parse element element = Some(Ident::new(match parser.next_token().token { Token::Word(w) => Ok(w.value), - _ => parser.expected("file_col_num", parser.peek_token()), + _ => parser.expected_ref("file_col_num", parser.peek_token_ref()), }?)); } _ => { @@ -1381,7 +1549,7 @@ fn parse_select_item_for_data_load( if parser.parse_keyword(Keyword::AS) { item_as = Some(match parser.next_token().token { Token::Word(w) => Ok(Ident::new(w.value)), - _ => parser.expected("column item alias", parser.peek_token()), + _ => parser.expected_ref("column item alias", parser.peek_token_ref()), }?); } @@ -1409,7 +1577,7 @@ fn parse_stage_params(parser: &mut Parser) -> Result Ok(word), - _ => parser.expected("a URL statement", parser.peek_token()), + _ => parser.expected_ref("a URL statement", parser.peek_token_ref()), }?) } @@ -1424,7 +1592,7 @@ fn parse_stage_params(parser: &mut Parser) -> Result Ok(word), - _ => parser.expected("an endpoint statement", parser.peek_token()), + _ => parser.expected_ref("an endpoint statement", parser.peek_token_ref()), }?) } @@ -1466,8 +1634,8 @@ fn parse_session_options( let mut options: Vec = Vec::new(); let empty = String::new; loop { - let next_token = parser.peek_token(); - match next_token.token { + let peeked_token = parser.peek_token(); + match peeked_token.token { Token::SemiColon | Token::EOF => break, Token::Comma => { parser.advance_token(); @@ -1481,12 +1649,17 @@ fn parse_session_options( } else { options.push(KeyValueOption { option_name: key.value, - option_value: KeyValueOptionKind::Single(Value::Placeholder(empty())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder(empty()).with_span(Span { + start: peeked_token.span.end, + end: peeked_token.span.end, + }), + ), }); } } _ => { - return parser.expected("another option or end of statement", next_token); + return parser.expected("another option or end of statement", peeked_token); } } } @@ -1584,3 +1757,171 @@ fn parse_show_objects(terse: bool, parser: &mut Parser) -> Result +/// +/// -- Conditional multi-table insert +/// INSERT [ OVERWRITE ] { FIRST | ALL } +/// { WHEN THEN intoClause [ ... ] } +/// [ ... ] +/// [ ELSE intoClause ] +/// +/// ``` +/// +/// See: +fn parse_multi_table_insert( + parser: &mut Parser, + insert_token: TokenWithSpan, + overwrite: bool, + multi_table_insert_type: MultiTableInsertType, +) -> Result { + // Check if this is conditional (has WHEN clauses) or unconditional (direct INTO clauses) + let is_conditional = parser.peek_keyword(Keyword::WHEN); + + let (multi_table_into_clauses, multi_table_when_clauses, multi_table_else_clause) = + if is_conditional { + // Conditional multi-table insert: WHEN clauses + let (when_clauses, else_clause) = parse_multi_table_insert_when_clauses(parser)?; + (vec![], when_clauses, else_clause) + } else { + // Unconditional multi-table insert: direct INTO clauses + let into_clauses = parse_multi_table_insert_into_clauses(parser)?; + (into_clauses, vec![], None) + }; + + // Parse the source query + let source = parser.parse_query()?; + + Ok(Statement::Insert(Insert { + insert_token: insert_token.into(), + optimizer_hints: vec![], + or: None, + ignore: false, + into: false, + table: TableObject::TableName(ObjectName(vec![])), // Not used for multi-table insert + table_alias: None, + columns: vec![], + overwrite, + source: Some(source), + assignments: vec![], + partitioned: None, + after_columns: vec![], + has_table_keyword: false, + on: None, + returning: None, + output: None, + replace_into: false, + priority: None, + insert_alias: None, + settings: None, + format_clause: None, + multi_table_insert_type: Some(multi_table_insert_type), + multi_table_into_clauses, + multi_table_when_clauses, + multi_table_else_clause, + })) +} + +/// Parse one or more INTO clauses for multi-table INSERT. +fn parse_multi_table_insert_into_clauses( + parser: &mut Parser, +) -> Result, ParserError> { + let mut into_clauses = vec![]; + while parser.parse_keyword(Keyword::INTO) { + into_clauses.push(parse_multi_table_insert_into_clause(parser)?); + } + if into_clauses.is_empty() { + return parser.expected_ref("INTO clause in multi-table INSERT", parser.peek_token_ref()); + } + Ok(into_clauses) +} + +/// Parse a single INTO clause for multi-table INSERT. +/// +/// Syntax: `INTO
[ ( ) ] [ VALUES ( ) ]` +fn parse_multi_table_insert_into_clause( + parser: &mut Parser, +) -> Result { + let table_name = parser.parse_object_name(false)?; + + // Parse optional column list: ( [, ...] ) + let columns = parser + .maybe_parse(|p| p.parse_parenthesized_column_list(IsOptional::Mandatory, false))? + .unwrap_or_default(); + + // Parse optional VALUES clause + let values = if parser.parse_keyword(Keyword::VALUES) { + parser.expect_token(&Token::LParen)?; + let values = parser.parse_comma_separated(parse_multi_table_insert_value)?; + parser.expect_token(&Token::RParen)?; + Some(MultiTableInsertValues { values }) + } else { + None + }; + + Ok(MultiTableInsertIntoClause { + table_name, + columns, + values, + }) +} + +/// Parse a single value in a multi-table INSERT VALUES clause. +fn parse_multi_table_insert_value( + parser: &mut Parser, +) -> Result { + if parser.parse_keyword(Keyword::DEFAULT) { + Ok(MultiTableInsertValue::Default) + } else { + Ok(MultiTableInsertValue::Expr(parser.parse_expr()?)) + } +} + +/// Parse WHEN clauses for conditional multi-table INSERT. +fn parse_multi_table_insert_when_clauses( + parser: &mut Parser, +) -> Result< + ( + Vec, + Option>, + ), + ParserError, +> { + let mut when_clauses = vec![]; + let mut else_clause = None; + + // Parse WHEN clauses + while parser.parse_keyword(Keyword::WHEN) { + let condition = parser.parse_expr()?; + parser.expect_keyword(Keyword::THEN)?; + + // Parse INTO clauses for this WHEN + let into_clauses = parse_multi_table_insert_into_clauses(parser)?; + + when_clauses.push(MultiTableInsertWhenClause { + condition, + into_clauses, + }); + } + + // Parse optional ELSE clause + if parser.parse_keyword(Keyword::ELSE) { + else_clause = Some(parse_multi_table_insert_into_clauses(parser)?); + } + + if when_clauses.is_empty() { + return parser.expected_ref( + "at least one WHEN clause in conditional multi-table INSERT", + parser.peek_token_ref(), + ); + } + + Ok((when_clauses, else_clause)) +} diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index ba4cb6173a..39ee622d88 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -30,7 +30,8 @@ use crate::parser::{Parser, ParserError}; /// [`CREATE TABLE`](https://sqlite.org/lang_createtable.html) statement with no /// type specified, as in `CREATE TABLE t1 (a)`. In the AST, these columns will /// have the data type [`Unspecified`](crate::ast::DataType::Unspecified). -#[derive(Debug)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct SQLiteDialect {} impl Dialect for SQLiteDialect { @@ -88,7 +89,10 @@ impl Dialect for SQLiteDialect { ] { if parser.parse_keyword(keyword) { let left = Box::new(expr.clone()); - let right = Box::new(parser.parse_expr().unwrap()); + let right = Box::new(match parser.parse_expr() { + Ok(expr) => expr, + Err(e) => return Some(Err(e)), + }); return Some(Ok(Expr::BinaryOp { left, op, right })); } } @@ -116,4 +120,8 @@ impl Dialect for SQLiteDialect { fn supports_notnull_operator(&self) -> bool { true } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/keywords.rs b/src/keywords.rs index 827df1cee6..f0f37b1c02 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -37,6 +37,7 @@ use sqlparser_derive::{Visit, VisitMut}; /// expands to `pub const SELECT = "SELECT";` macro_rules! kw_def { ($ident:ident = $string_keyword:expr) => { + #[doc = concat!("The `", $string_keyword, "` SQL keyword.")] pub const $ident: &'static str = $string_keyword; }; ($ident:ident) => { @@ -54,19 +55,35 @@ macro_rules! define_keywords { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[allow(non_camel_case_types)] + /// An enumeration of SQL keywords recognized by the parser. pub enum Keyword { + /// Represents no keyword. NoKeyword, - $($ident),* + $( + #[doc = concat!("The `", stringify!($ident), "` SQL keyword.")] + $ident + ),* } + /// Array of all `Keyword` enum values in declaration order. pub const ALL_KEYWORDS_INDEX: &[Keyword] = &[ $(Keyword::$ident),* ]; $(kw_def!($ident $(= $string_keyword)?);)* + /// Array of all SQL keywords as string constants. pub const ALL_KEYWORDS: &[&str] = &[ $($ident),* ]; + + impl core::fmt::Display for Keyword { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self { + Keyword::NoKeyword => write!(f, "NoKeyword"), + $(Keyword::$ident => write!(f, "{}", $ident),)* + } + } + } }; } @@ -128,6 +145,7 @@ define_keywords!( AVG, AVG_ROW_LENGTH, AVRO, + BACKUP, BACKWARD, BASE64, BASE_LOCATION, @@ -185,6 +203,7 @@ define_keywords!( CENTURY, CHAIN, CHANGE, + CHANGES, CHANGE_TRACKING, CHANNEL, CHAR, @@ -295,6 +314,7 @@ define_keywords!( DEFINE, DEFINED, DEFINER, + DELAY, DELAYED, DELAY_KEY_WRITE, DELEGATED, @@ -316,7 +336,10 @@ define_keywords!( DISCARD, DISCONNECT, DISTINCT, + DISTINCTROW, + DISTKEY, DISTRIBUTE, + DISTSTYLE, DIV, DO, DOMAIN, @@ -360,6 +383,7 @@ define_keywords!( ESCAPE, ESCAPED, ESTIMATE, + EVEN, EVENT, EVERY, EVOLVE, @@ -538,6 +562,7 @@ define_keywords!( KEY_BLOCK_SIZE, KILL, LAG, + LAMBDA, LANGUAGE, LARGE, LAST, @@ -550,6 +575,7 @@ define_keywords!( LEFT, LEFTARG, LEVEL, + LIFECYCLE, LIKE, LIKE_REGEX, LIMIT, @@ -628,6 +654,7 @@ define_keywords!( MODIFIES, MODIFY, MODULE, + MODULUS, MONITOR, MONTH, MONTHS, @@ -658,6 +685,7 @@ define_keywords!( NOCOMPRESS, NOCREATEDB, NOCREATEROLE, + NOCYCLE, NOINHERIT, NOLOGIN, NONE, @@ -828,6 +856,7 @@ define_keywords!( RELAY, RELEASE, RELEASES, + REMAINDER, REMOTE, REMOVE, REMOVEQUOTES, @@ -910,6 +939,7 @@ define_keywords!( SESSION_USER, SET, SETERROR, + SETOF, SETS, SETTINGS, SHARE, @@ -927,6 +957,7 @@ define_keywords!( SOME, SORT, SORTED, + SORTKEY, SOURCE, SPATIAL, SPECIFIC, @@ -936,6 +967,11 @@ define_keywords!( SQLEXCEPTION, SQLSTATE, SQLWARNING, + SQL_BIG_RESULT, + SQL_BUFFER_RESULT, + SQL_CALC_FOUND_ROWS, + SQL_NO_CACHE, + SQL_SMALL_RESULT, SQRT, SRID, STABLE, @@ -1002,6 +1038,7 @@ define_keywords!( TEXT, TEXTFILE, THEN, + THROW, TIES, TIME, TIMEFORMAT, @@ -1023,12 +1060,14 @@ define_keywords!( TOTP, TRACE, TRAILING, + TRAN, TRANSACTION, TRANSIENT, TRANSLATE, TRANSLATE_REGEX, TRANSLATION, TREAT, + TREE, TRIGGER, TRIM, TRIM_ARRAY, @@ -1108,6 +1147,7 @@ define_keywords!( VIRTUAL, VOLATILE, VOLUME, + WAITFOR, WAREHOUSE, WAREHOUSES, WEEK, @@ -1132,6 +1172,7 @@ define_keywords!( XOR, YEAR, YEARS, + YES, ZONE, ZORDER, ZSTD @@ -1178,6 +1219,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::ANTI, Keyword::SEMI, Keyword::RETURNING, + Keyword::OUTPUT, Keyword::ASOF, Keyword::MATCH_CONDITION, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) @@ -1232,15 +1274,16 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::CLUSTER, Keyword::DISTRIBUTE, Keyword::RETURNING, + Keyword::VALUES, // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, Keyword::INTO, Keyword::END, ]; -// Global list of reserved keywords allowed after FROM. -// Parser should call Dialect::get_reserved_keyword_after_from -// to allow for each dialect to customize the list. +/// Global list of reserved keywords allowed after FROM. +/// Parser should call Dialect::get_reserved_keyword_after_from +/// to allow for each dialect to customize the list. pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[ Keyword::INTO, Keyword::LIMIT, diff --git a/src/lib.rs b/src/lib.rs index c2fe50794d..e68d7f93eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -153,8 +153,8 @@ // Splitting complex nodes (expressions, statements, types) into separate types // would bloat the API and hide intent. Extra memory is a worthwhile tradeoff. #![allow(clippy::large_enum_variant)] -// TODO: Fix and remove this. -#![expect(clippy::unnecessary_unwrap)] +#![forbid(clippy::unreachable)] +#![forbid(missing_docs)] // Allow proc-macros to find this crate extern crate self as sqlparser; @@ -168,7 +168,11 @@ extern crate pretty_assertions; pub mod ast; #[macro_use] +/// Submodules for SQL dialects. pub mod dialect; + +#[cfg(feature = "derive-dialect")] +pub use dialect::derive_dialect; mod display_utils; pub mod keywords; pub mod parser; diff --git a/src/parser/alter.rs b/src/parser/alter.rs index b3e3c99e64..4000eb26ba 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -19,10 +19,10 @@ use super::{Parser, ParserError}; use crate::{ ast::{ helpers::key_value_options::{KeyValueOptions, KeyValueOptionsDelimiter}, - AlterConnectorOwner, AlterPolicyOperation, AlterRoleOperation, AlterUser, + AlterConnectorOwner, AlterPolicy, AlterPolicyOperation, AlterRoleOperation, AlterUser, AlterUserAddMfaMethodOtp, AlterUserAddRoleDelegation, AlterUserModifyMfaMethod, - AlterUserRemoveRoleDelegation, AlterUserSetPolicy, Expr, MfaMethodKind, Password, - ResetConfig, RoleOption, SetConfigValue, Statement, UserPolicyKind, + AlterUserPassword, AlterUserRemoveRoleDelegation, AlterUserSetPolicy, Expr, MfaMethodKind, + Password, ResetConfig, RoleOption, SetConfigValue, Statement, UserPolicyKind, }, dialect::{MsSqlDialect, PostgreSqlDialect}, keywords::Keyword, @@ -30,6 +30,7 @@ use crate::{ }; impl Parser<'_> { + /// Parse `ALTER ROLE` statement pub fn parse_alter_role(&mut self) -> Result { if dialect_of!(self is PostgreSqlDialect) { return self.parse_pg_alter_role(); @@ -53,7 +54,7 @@ impl Parser<'_> { /// ``` /// /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-alterpolicy.html) - pub fn parse_alter_policy(&mut self) -> Result { + pub fn parse_alter_policy(&mut self) -> Result { let name = self.parse_identifier()?; self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; @@ -61,7 +62,7 @@ impl Parser<'_> { if self.parse_keyword(Keyword::RENAME) { self.expect_keyword_is(Keyword::TO)?; let new_name = self.parse_identifier()?; - Ok(Statement::AlterPolicy { + Ok(AlterPolicy { name, table_name, operation: AlterPolicyOperation::Rename { new_name }, @@ -90,7 +91,7 @@ impl Parser<'_> { } else { None }; - Ok(Statement::AlterPolicy { + Ok(AlterPolicy { name, table_name, operation: AlterPolicyOperation::Apply { @@ -147,9 +148,10 @@ impl Parser<'_> { /// ```sql /// ALTER USER [ IF EXISTS ] [ ] [ OPTIONS ] /// ``` - pub fn parse_alter_user(&mut self) -> Result { + pub fn parse_alter_user(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let name = self.parse_identifier()?; + let _ = self.parse_keyword(Keyword::WITH); let rename_to = if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) { Some(self.parse_identifier()?) } else { @@ -180,9 +182,9 @@ impl Parser<'_> { } else if self.parse_keyword(Keyword::AUTHORIZATIONS) { None } else { - return self.expected( + return self.expected_ref( "REMOVE DELEGATED AUTHORIZATION OF ROLE | REMOVE DELEGATED AUTHORIZATIONS", - self.peek_token(), + self.peek_token_ref(), ); }; self.expect_keywords(&[Keyword::FROM, Keyword::SECURITY, Keyword::INTEGRATION])?; @@ -217,7 +219,7 @@ impl Parser<'_> { if self.parse_keywords(&[Keyword::ADD, Keyword::MFA, Keyword::METHOD, Keyword::OTP]) { let count = if self.parse_keyword(Keyword::COUNT) { self.expect_token(&Token::Eq)?; - Some(self.parse_value()?.into()) + Some(self.parse_value()?) } else { None }; @@ -292,7 +294,22 @@ impl Parser<'_> { vec![] }; - Ok(Statement::AlterUser(AlterUser { + let encrypted = self.parse_keyword(Keyword::ENCRYPTED); + let password = if self.parse_keyword(Keyword::PASSWORD) { + let password = if self.parse_keyword(Keyword::NULL) { + None + } else { + Some(self.parse_literal_string()?) + }; + Some(AlterUserPassword { + encrypted, + password, + }) + } else { + None + }; + + Ok(AlterUser { if_exists, name, rename_to, @@ -311,7 +328,8 @@ impl Parser<'_> { unset_tag, set_props, unset_props, - })) + password, + }) } fn parse_mfa_method(&mut self) -> Result { @@ -322,7 +340,7 @@ impl Parser<'_> { } else if self.parse_keyword(Keyword::DUO) { Ok(MfaMethodKind::Duo) } else { - self.expected("PASSKEY, TOTP or DUO", self.peek_token()) + self.expected_ref("PASSKEY, TOTP or DUO", self.peek_token_ref()) } } @@ -340,10 +358,10 @@ impl Parser<'_> { let role_name = self.parse_identifier()?; AlterRoleOperation::RenameRole { role_name } } else { - return self.expected("= after WITH NAME ", self.peek_token()); + return self.expected_ref("= after WITH NAME ", self.peek_token_ref()); } } else { - return self.expected("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token()); + return self.expected_ref("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token_ref()); }; Ok(Statement::AlterRole { @@ -367,7 +385,7 @@ impl Parser<'_> { let role_name = self.parse_identifier()?; AlterRoleOperation::RenameRole { role_name } } else { - return self.expected("TO after RENAME", self.peek_token()); + return self.expected_ref("TO after RENAME", self.peek_token_ref()); } // SET } else if self.parse_keyword(Keyword::SET) { @@ -394,10 +412,10 @@ impl Parser<'_> { in_database, } } else { - self.expected("config value", self.peek_token())? + self.expected_ref("config value", self.peek_token_ref())? } } else { - self.expected("'TO' or '=' or 'FROM CURRENT'", self.peek_token())? + self.expected_ref("'TO' or '=' or 'FROM CURRENT'", self.peek_token_ref())? } // RESET } else if self.parse_keyword(Keyword::RESET) { @@ -424,7 +442,7 @@ impl Parser<'_> { } // check option if options.is_empty() { - return self.expected("option", self.peek_token())?; + return self.expected_ref("option", self.peek_token_ref())?; } AlterRoleOperation::WithOptions { options } @@ -486,7 +504,7 @@ impl Parser<'_> { self.expect_keyword_is(Keyword::UNTIL)?; RoleOption::ValidUntil(Expr::Value(self.parse_value()?)) } - _ => self.expected("option", self.peek_token())?, + _ => self.expected_ref("option", self.peek_token_ref())?, }; Ok(option) diff --git a/src/parser/merge.rs b/src/parser/merge.rs new file mode 100644 index 0000000000..619be612bf --- /dev/null +++ b/src/parser/merge.rs @@ -0,0 +1,261 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL Parser for a `MERGE` statement + +#[cfg(not(feature = "std"))] +use alloc::{boxed::Box, format, vec, vec::Vec}; + +use crate::{ + ast::{ + Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, + MergeUpdateExpr, ObjectName, OutputClause, SetExpr, + }, + dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, + keywords::Keyword, + parser::IsOptional, + tokenizer::TokenWithSpan, +}; + +use super::{Parser, ParserError}; + +impl Parser<'_> { + /// Parse a `MERGE` statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + pub(super) fn parse_merge_setexpr_boxed( + &mut self, + merge_token: TokenWithSpan, + ) -> Result, ParserError> { + Ok(Box::new(SetExpr::Merge( + self.parse_merge(merge_token)?.into(), + ))) + } + + /// Parse a `MERGE` statement + pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { + let optimizer_hints = self.maybe_parse_optimizer_hints()?; + let into = self.parse_keyword(Keyword::INTO); + + let table = self.parse_table_factor()?; + + self.expect_keyword_is(Keyword::USING)?; + let source = self.parse_table_factor()?; + self.expect_keyword_is(Keyword::ON)?; + let on = self.parse_expr()?; + let clauses = self.parse_merge_clauses()?; + let output = match self.parse_one_of_keywords(&[Keyword::OUTPUT, Keyword::RETURNING]) { + Some(keyword) => Some(self.parse_output(keyword, self.get_current_token().clone())?), + None => None, + }; + + Ok(Merge { + merge_token: merge_token.into(), + optimizer_hints, + into, + table, + source, + on: Box::new(on), + clauses, + output, + }) + } + + fn parse_merge_clauses(&mut self) -> Result, ParserError> { + let mut clauses = vec![]; + loop { + if !(self.parse_keyword(Keyword::WHEN)) { + break; + } + let when_token = self.get_current_token().clone(); + + let mut clause_kind = MergeClauseKind::Matched; + if self.parse_keyword(Keyword::NOT) { + clause_kind = MergeClauseKind::NotMatched; + } + self.expect_keyword_is(Keyword::MATCHED)?; + + if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) + { + clause_kind = MergeClauseKind::NotMatchedBySource; + } else if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) + { + clause_kind = MergeClauseKind::NotMatchedByTarget; + } + + let predicate = if self.parse_keyword(Keyword::AND) { + Some(self.parse_expr()?) + } else { + None + }; + + self.expect_keyword_is(Keyword::THEN)?; + + let merge_clause = match self.parse_one_of_keywords(&[ + Keyword::UPDATE, + Keyword::INSERT, + Keyword::DELETE, + ]) { + Some(Keyword::UPDATE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("UPDATE is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + } + + let update_token = self.get_current_token().clone(); + self.expect_keyword_is(Keyword::SET)?; + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + let update_predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + let delete_predicate = if self.parse_keyword(Keyword::DELETE) { + let _ = self.expect_keyword(Keyword::WHERE)?; + Some(self.parse_expr()?) + } else { + None + }; + MergeAction::Update(MergeUpdateExpr { + update_token: update_token.into(), + assignments, + update_predicate, + delete_predicate, + }) + } + Some(Keyword::DELETE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("DELETE is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + }; + + let delete_token = self.get_current_token().clone(); + MergeAction::Delete { + delete_token: delete_token.into(), + } + } + Some(Keyword::INSERT) => { + if !matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("INSERT is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + }; + + let insert_token = self.get_current_token().clone(); + let is_mysql = dialect_of!(self is MySqlDialect); + + let columns = self.parse_merge_clause_insert_columns(is_mysql)?; + let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::ROW) + { + (MergeInsertKind::Row, self.get_current_token().clone()) + } else { + self.expect_keyword_is(Keyword::VALUES)?; + let values_token = self.get_current_token().clone(); + let values = self.parse_values(is_mysql, false)?; + (MergeInsertKind::Values(values), values_token) + }; + let insert_predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + MergeAction::Insert(MergeInsertExpr { + insert_token: insert_token.into(), + columns, + kind_token: kind_token.into(), + kind, + insert_predicate, + }) + } + _ => { + return parser_err!( + "expected UPDATE, DELETE or INSERT in merge clause", + self.peek_token_ref().span.start + ); + } + }; + clauses.push(MergeClause { + when_token: when_token.into(), + clause_kind, + predicate, + action: merge_clause, + }); + } + Ok(clauses) + } + + fn parse_merge_clause_insert_columns( + &mut self, + allow_empty: bool, + ) -> Result, ParserError> { + self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty) + } + + /// Parses an `OUTPUT` clause if present (MSSQL). + pub(super) fn maybe_parse_output_clause( + &mut self, + ) -> Result, ParserError> { + if self.parse_keyword(Keyword::OUTPUT) { + Ok(Some(self.parse_output( + Keyword::OUTPUT, + self.get_current_token().clone(), + )?)) + } else { + Ok(None) + } + } + + pub(super) fn parse_output( + &mut self, + start_keyword: Keyword, + start_token: TokenWithSpan, + ) -> Result { + let select_items = self.parse_projection()?; + let into_table = if start_keyword == Keyword::OUTPUT && self.peek_keyword(Keyword::INTO) { + self.expect_keyword_is(Keyword::INTO)?; + Some(self.parse_select_into()?) + } else { + None + }; + + Ok(if start_keyword == Keyword::OUTPUT { + OutputClause::Output { + output_token: start_token.into(), + select_items, + into_table, + } + } else { + OutputClause::Returning { + returning_token: start_token.into(), + select_items, + } + }) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1b0ae2e0ac..a88c119b28 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -32,25 +32,29 @@ use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; -use crate::ast::helpers::{ - key_value_options::{ - KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter, +use crate::ast::*; +use crate::ast::{ + comments, + helpers::{ + key_value_options::{ + KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter, + }, + stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}, }, - stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}, }; -use crate::ast::Statement::CreatePolicy; -use crate::ast::*; use crate::dialect::*; use crate::keywords::{Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; use sqlparser::parser::ParserState::ColumnDefinition; -mod alter; - +/// Errors produced by the SQL parser. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParserError { + /// Error originating from the tokenizer with a message. TokenizerError(String), + /// Generic parser error with a message. ParserError(String), + /// Raised when a recursion depth limit is exceeded. RecursionLimitExceeded, } @@ -61,6 +65,9 @@ macro_rules! parser_err { }; } +mod alter; +mod merge; + #[cfg(feature = "std")] /// Implementation [`RecursionCounter`] if std is available mod recursion { @@ -150,19 +157,29 @@ mod recursion { } #[derive(PartialEq, Eq)] +/// Indicates whether a parser element is optional or mandatory. pub enum IsOptional { + /// The element is optional. Optional, + /// The element is mandatory. Mandatory, } +/// Indicates if a table expression is lateral. pub enum IsLateral { + /// The expression is lateral. Lateral, + /// The expression is not lateral. NotLateral, } +/// Represents a wildcard expression used in SELECT lists. pub enum WildcardExpr { + /// A specific expression used instead of a wildcard. Expr(Expr), + /// A qualified wildcard like `table.*`. QualifiedWildcard(ObjectName), + /// An unqualified `*` wildcard. Wildcard, } @@ -186,8 +203,7 @@ impl fmt::Display for ParserError { } } -#[cfg(feature = "std")] -impl std::error::Error for ParserError {} +impl core::error::Error for ParserError {} // By default, allow expressions up to this deep before erroring const DEFAULT_REMAINING_DEPTH: usize = 50; @@ -224,6 +240,7 @@ impl From for MatchedTrailingBracket { /// Options that control how the [`Parser`] parses SQL text #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserOptions { + /// Allow trailing commas in lists (e.g. `a, b,`). pub trailing_commas: bool, /// Controls how literal values are unescaped. See /// [`Tokenizer::with_unescape`] for more details. @@ -487,7 +504,7 @@ impl<'a> Parser<'a> { expecting_statement_delimiter = false; } - match self.peek_token().token { + match &self.peek_token_ref().token { Token::EOF => break, // end of statement @@ -500,7 +517,7 @@ impl<'a> Parser<'a> { } if expecting_statement_delimiter { - return self.expected("end of statement", self.peek_token()); + return self.expected_ref("end of statement", self.peek_token_ref()); } let statement = self.parse_statement()?; @@ -529,6 +546,44 @@ impl<'a> Parser<'a> { Parser::new(dialect).try_with_sql(sql)?.parse_statements() } + /// Parses the given `sql` into an Abstract Syntax Tree (AST), returning + /// also encountered source code comments. + /// + /// See [Parser::parse_sql]. + pub fn parse_sql_with_comments( + dialect: &'a dyn Dialect, + sql: &str, + ) -> Result<(Vec, comments::Comments), ParserError> { + let mut p = Parser::new(dialect).try_with_sql(sql)?; + p.parse_statements().map(|stmts| (stmts, p.into_comments())) + } + + /// Consumes this parser returning comments from the parsed token stream. + fn into_comments(self) -> comments::Comments { + let mut comments = comments::Comments::default(); + for t in self.tokens.into_iter() { + match t.token { + Token::Whitespace(Whitespace::SingleLineComment { comment, prefix }) => { + comments.offer(comments::CommentWithSpan { + comment: comments::Comment::SingleLine { + content: comment, + prefix, + }, + span: t.span, + }); + } + Token::Whitespace(Whitespace::MultiLineComment(comment)) => { + comments.offer(comments::CommentWithSpan { + comment: comments::Comment::MultiLine(comment), + span: t.span, + }); + } + _ => {} + } + } + comments + } + /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.), /// stopping before the statement separator, if any. pub fn parse_statement(&mut self) -> Result { @@ -547,28 +602,28 @@ impl<'a> Parser<'a> { Keyword::DESC => self.parse_explain(DescribeAlias::Desc), Keyword::DESCRIBE => self.parse_explain(DescribeAlias::Describe), Keyword::EXPLAIN => self.parse_explain(DescribeAlias::Explain), - Keyword::ANALYZE => self.parse_analyze(), + Keyword::ANALYZE => self.parse_analyze().map(Into::into), Keyword::CASE => { self.prev_token(); - self.parse_case_stmt() + self.parse_case_stmt().map(Into::into) } Keyword::IF => { self.prev_token(); - self.parse_if_stmt() + self.parse_if_stmt().map(Into::into) } Keyword::WHILE => { self.prev_token(); - self.parse_while() + self.parse_while().map(Into::into) } Keyword::RAISE => { self.prev_token(); - self.parse_raise_stmt() + self.parse_raise_stmt().map(Into::into) } Keyword::SELECT | Keyword::WITH | Keyword::VALUES | Keyword::FROM => { self.prev_token(); - self.parse_query().map(Statement::Query) + self.parse_query().map(Into::into) } - Keyword::TRUNCATE => self.parse_truncate(), + Keyword::TRUNCATE => self.parse_truncate().map(Into::into), Keyword::ATTACH => { if dialect_of!(self is DuckDbDialect) { self.parse_attach_duckdb_database() @@ -576,10 +631,10 @@ impl<'a> Parser<'a> { self.parse_attach_database() } } - Keyword::DETACH if dialect_of!(self is DuckDbDialect | GenericDialect) => { + Keyword::DETACH if self.dialect.supports_detach() => { self.parse_detach_duckdb_database() } - Keyword::MSCK => self.parse_msck(), + Keyword::MSCK => self.parse_msck().map(Into::into), Keyword::CREATE => self.parse_create(), Keyword::CACHE => self.parse_cache_table(), Keyword::DROP => self.parse_drop(), @@ -602,12 +657,12 @@ impl<'a> Parser<'a> { Keyword::SET => self.parse_set(), Keyword::SHOW => self.parse_show(), Keyword::USE => self.parse_use(), - Keyword::GRANT => self.parse_grant(), + Keyword::GRANT => self.parse_grant().map(Into::into), Keyword::DENY => { self.prev_token(); self.parse_deny() } - Keyword::REVOKE => self.parse_revoke(), + Keyword::REVOKE => self.parse_revoke().map(Into::into), Keyword::START => self.parse_start_transaction(), Keyword::BEGIN => self.parse_begin(), Keyword::END => self.parse_end(), @@ -615,6 +670,10 @@ impl<'a> Parser<'a> { Keyword::RELEASE => self.parse_release(), Keyword::COMMIT => self.parse_commit(), Keyword::RAISERROR => Ok(self.parse_raiserror()?), + Keyword::THROW => { + self.prev_token(); + self.parse_throw().map(Into::into) + } Keyword::ROLLBACK => self.parse_rollback(), Keyword::ASSERT => self.parse_assert(), // `PREPARE`, `EXECUTE` and `DEALLOCATE` are Postgres-specific @@ -622,7 +681,7 @@ impl<'a> Parser<'a> { Keyword::DEALLOCATE => self.parse_deallocate(), Keyword::EXECUTE | Keyword::EXEC => self.parse_execute(), Keyword::PREPARE => self.parse_prepare(), - Keyword::MERGE => self.parse_merge(next_token), + Keyword::MERGE => self.parse_merge(next_token).map(Into::into), // `LISTEN`, `UNLISTEN` and `NOTIFY` are Postgres-specific // syntaxes. They are used for Postgres statement. Keyword::LISTEN if self.dialect.supports_listen_notify() => self.parse_listen(), @@ -636,17 +695,20 @@ impl<'a> Parser<'a> { } Keyword::RENAME => self.parse_rename(), // `INSTALL` is duckdb specific https://duckdb.org/docs/extensions/overview - Keyword::INSTALL if dialect_of!(self is DuckDbDialect | GenericDialect) => { - self.parse_install() - } + Keyword::INSTALL if self.dialect.supports_install() => self.parse_install(), Keyword::LOAD => self.parse_load(), - // `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/ - Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Keyword::LOCK => { + self.prev_token(); + self.parse_lock_statement().map(Into::into) + } + Keyword::OPTIMIZE if self.dialect.supports_optimize_table() => { self.parse_optimize_table() } // `COMMENT` is snowflake specific https://docs.snowflake.com/en/sql-reference/sql/comment Keyword::COMMENT if self.dialect.supports_comment_on() => self.parse_comment(), Keyword::PRINT => self.parse_print(), + // `WAITFOR` is MSSQL specific https://learn.microsoft.com/en-us/sql/t-sql/language-elements/waitfor-transact-sql + Keyword::WAITFOR => self.parse_waitfor(), Keyword::RETURN => self.parse_return(), Keyword::EXPORT => { self.prev_token(); @@ -656,12 +718,12 @@ impl<'a> Parser<'a> { self.prev_token(); self.parse_vacuum() } - Keyword::RESET => self.parse_reset(), + Keyword::RESET => self.parse_reset().map(Into::into), _ => self.expected("an SQL statement", next_token), }, Token::LParen => { self.prev_token(); - self.parse_query().map(Statement::Query) + self.parse_query().map(Into::into) } _ => self.expected("an SQL statement", next_token), } @@ -670,7 +732,7 @@ impl<'a> Parser<'a> { /// Parse a `CASE` statement. /// /// See [Statement::Case] - pub fn parse_case_stmt(&mut self) -> Result { + pub fn parse_case_stmt(&mut self) -> Result { let case_token = self.expect_keyword(Keyword::CASE)?; let match_expr = if self.peek_keyword(Keyword::WHEN) { @@ -695,19 +757,19 @@ impl<'a> Parser<'a> { end_case_token = self.expect_keyword(Keyword::CASE)?; } - Ok(Statement::Case(CaseStatement { + Ok(CaseStatement { case_token: AttachedToken(case_token), match_expr, when_blocks, else_block, end_case_token: AttachedToken(end_case_token), - })) + }) } /// Parse an `IF` statement. /// /// See [Statement::If] - pub fn parse_if_stmt(&mut self) -> Result { + pub fn parse_if_stmt(&mut self) -> Result { self.expect_keyword_is(Keyword::IF)?; let if_block = self.parse_conditional_statement_block(&[ Keyword::ELSE, @@ -736,22 +798,22 @@ impl<'a> Parser<'a> { self.expect_keyword_is(Keyword::END)?; let end_token = self.expect_keyword(Keyword::IF)?; - Ok(Statement::If(IfStatement { + Ok(IfStatement { if_block, elseif_blocks, else_block, end_token: Some(AttachedToken(end_token)), - })) + }) } /// Parse a `WHILE` statement. /// /// See [Statement::While] - fn parse_while(&mut self) -> Result { + fn parse_while(&mut self) -> Result { self.expect_keyword_is(Keyword::WHILE)?; let while_block = self.parse_conditional_statement_block(&[Keyword::END])?; - Ok(Statement::While(WhileStatement { while_block })) + Ok(WhileStatement { while_block }) } /// Parses an expression and associated list of statements @@ -818,7 +880,7 @@ impl<'a> Parser<'a> { /// Parse a `RAISE` statement. /// /// See [Statement::Raise] - pub fn parse_raise_stmt(&mut self) -> Result { + pub fn parse_raise_stmt(&mut self) -> Result { self.expect_keyword_is(Keyword::RAISE)?; let value = if self.parse_keywords(&[Keyword::USING, Keyword::MESSAGE]) { @@ -828,9 +890,11 @@ impl<'a> Parser<'a> { self.maybe_parse(|parser| parser.parse_expr().map(RaiseStatementValue::Expr))? }; - Ok(Statement::Raise(RaiseStatement { value })) + Ok(RaiseStatement { value }) } - + /// Parse a COMMENT statement. + /// + /// See [Statement::Comment] pub fn parse_comment(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); @@ -841,23 +905,51 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::COLUMN => { (CommentObject::Column, self.parse_object_name(false)?) } - Token::Word(w) if w.keyword == Keyword::TABLE => { - (CommentObject::Table, self.parse_object_name(false)?) + Token::Word(w) if w.keyword == Keyword::DATABASE => { + (CommentObject::Database, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::DOMAIN => { + (CommentObject::Domain, self.parse_object_name(false)?) } Token::Word(w) if w.keyword == Keyword::EXTENSION => { (CommentObject::Extension, self.parse_object_name(false)?) } + Token::Word(w) if w.keyword == Keyword::FUNCTION => { + (CommentObject::Function, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::INDEX => { + (CommentObject::Index, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::MATERIALIZED => { + self.expect_keyword_is(Keyword::VIEW)?; + ( + CommentObject::MaterializedView, + self.parse_object_name(false)?, + ) + } + Token::Word(w) if w.keyword == Keyword::PROCEDURE => { + (CommentObject::Procedure, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::ROLE => { + (CommentObject::Role, self.parse_object_name(false)?) + } Token::Word(w) if w.keyword == Keyword::SCHEMA => { (CommentObject::Schema, self.parse_object_name(false)?) } - Token::Word(w) if w.keyword == Keyword::DATABASE => { - (CommentObject::Database, self.parse_object_name(false)?) + Token::Word(w) if w.keyword == Keyword::SEQUENCE => { + (CommentObject::Sequence, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::TABLE => { + (CommentObject::Table, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::TYPE => { + (CommentObject::Type, self.parse_object_name(false)?) } Token::Word(w) if w.keyword == Keyword::USER => { (CommentObject::User, self.parse_object_name(false)?) } - Token::Word(w) if w.keyword == Keyword::ROLE => { - (CommentObject::Role, self.parse_object_name(false)?) + Token::Word(w) if w.keyword == Keyword::VIEW => { + (CommentObject::View, self.parse_object_name(false)?) } _ => self.expected("comment object_type", token)?, }; @@ -876,6 +968,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `FLUSH` statement. pub fn parse_flush(&mut self) -> Result { let mut channel = None; let mut tables: Vec = vec![]; @@ -883,7 +976,10 @@ impl<'a> Parser<'a> { let mut export = false; if !dialect_of!(self is MySqlDialect | GenericDialect) { - return parser_err!("Unsupported statement FLUSH", self.peek_token().span.start); + return parser_err!( + "Unsupported statement FLUSH", + self.peek_token_ref().span.start + ); } let location = if self.parse_keyword(Keyword::NO_WRITE_TO_BINLOG) { @@ -946,10 +1042,10 @@ impl<'a> Parser<'a> { FlushType::Tables } else { - return self.expected( + return self.expected_ref( "BINARY LOGS, ENGINE LOGS, ERROR LOGS, GENERAL LOGS, HOSTS, LOGS, PRIVILEGES, OPTIMIZER_COSTS,\ RELAY LOGS [FOR CHANNEL channel], SLOW LOGS, STATUS, USER_RESOURCES", - self.peek_token(), + self.peek_token_ref(), ); }; @@ -963,7 +1059,8 @@ impl<'a> Parser<'a> { }) } - pub fn parse_msck(&mut self) -> Result { + /// Parse `MSCK` statement. + pub fn parse_msck(&mut self) -> Result { let repair = self.parse_keyword(Keyword::REPAIR); self.expect_keyword_is(Keyword::TABLE)?; let table_name = self.parse_object_name(false)?; @@ -987,20 +1084,24 @@ impl<'a> Parser<'a> { repair, table_name, partition_action, - } - .into()) + }) } - pub fn parse_truncate(&mut self) -> Result { + /// Parse `TRUNCATE` statement. + pub fn parse_truncate(&mut self) -> Result { let table = self.parse_keyword(Keyword::TABLE); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let table_names = self - .parse_comma_separated(|p| { - Ok((p.parse_keyword(Keyword::ONLY), p.parse_object_name(false)?)) - })? - .into_iter() - .map(|(only, name)| TruncateTableTarget { name, only }) - .collect(); + let table_names = self.parse_comma_separated(|p| { + let only = p.parse_keyword(Keyword::ONLY); + let name = p.parse_object_name(false)?; + let has_asterisk = p.consume_token(&Token::Mul); + Ok(TruncateTableTarget { + name, + only, + has_asterisk, + }) + })?; let mut partitions = None; if self.parse_keyword(Keyword::PARTITION) { @@ -1030,11 +1131,11 @@ impl<'a> Parser<'a> { table_names, partitions, table, + if_exists, identity, cascade, on_cluster, - } - .into()) + }) } fn parse_cascade_option(&mut self) -> Option { @@ -1047,6 +1148,7 @@ impl<'a> Parser<'a> { } } + /// Parse options for `ATTACH DUCKDB DATABASE` statement. pub fn parse_attach_duckdb_database_options( &mut self, ) -> Result, ParserError> { @@ -1069,7 +1171,8 @@ impl<'a> Parser<'a> { let ident = self.parse_identifier()?; options.push(AttachDuckDBDatabaseOption::Type(ident)); } else { - return self.expected("expected one of: ), READ_ONLY, TYPE", self.peek_token()); + return self + .expected_ref("expected one of: ), READ_ONLY, TYPE", self.peek_token_ref()); }; if self.consume_token(&Token::RParen) { @@ -1077,11 +1180,12 @@ impl<'a> Parser<'a> { } else if self.consume_token(&Token::Comma) { continue; } else { - return self.expected("expected one of: ')', ','", self.peek_token()); + return self.expected_ref("expected one of: ')', ','", self.peek_token_ref()); } } } + /// Parse `ATTACH DUCKDB DATABASE` statement. pub fn parse_attach_duckdb_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -1102,6 +1206,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `DETACH DUCKDB DATABASE` statement. pub fn parse_detach_duckdb_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); @@ -1113,6 +1218,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `ATTACH DATABASE` statement. pub fn parse_attach_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let database_file_name = self.parse_expr()?; @@ -1125,15 +1231,23 @@ impl<'a> Parser<'a> { }) } - pub fn parse_analyze(&mut self) -> Result { + /// Parse `ANALYZE` statement. + pub fn parse_analyze(&mut self) -> Result { let has_table_keyword = self.parse_keyword(Keyword::TABLE); - let table_name = self.parse_object_name(false)?; + let table_name = self.maybe_parse(|parser| parser.parse_object_name(false))?; let mut for_columns = false; let mut cache_metadata = false; let mut noscan = false; let mut partitions = None; let mut compute_statistics = false; let mut columns = vec![]; + + // PostgreSQL syntax: ANALYZE t (col1, col2) + if table_name.is_some() && self.consume_token(&Token::LParen) { + columns = self.parse_comma_separated(|p| p.parse_identifier())?; + self.expect_token(&Token::RParen)?; + } + loop { match self.parse_one_of_keywords(&[ Keyword::PARTITION, @@ -1179,8 +1293,7 @@ impl<'a> Parser<'a> { cache_metadata, noscan, compute_statistics, - } - .into()) + }) } /// Parse a new expression including wildcard & qualified wildcard. @@ -1190,11 +1303,15 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { - if self.peek_token().token == Token::Period { + if self.peek_token_ref().token == Token::Period { let mut id_parts: Vec = vec![match t { Token::Word(w) => w.into_ident(next_token.span), Token::SingleQuotedString(s) => Ident::with_quote('\'', s), - _ => unreachable!(), // We matched above + _ => { + return Err(ParserError::ParserError( + "Internal parser error: unexpected token type".to_string(), + )) + } }]; while self.consume_token(&Token::Period) { @@ -1205,6 +1322,11 @@ impl<'a> Parser<'a> { // SQLite has single-quoted identifiers id_parts.push(Ident::with_quote('\'', s)) } + Token::Placeholder(s) => { + // Snowflake uses $1, $2, etc. for positional column references + // in staged data queries like: SELECT t.$1 FROM @stage t + id_parts.push(Ident::new(s)) + } Token::Mul => { return Ok(Expr::QualifiedWildcard( ObjectName::from(id_parts), @@ -1222,6 +1344,15 @@ impl<'a> Parser<'a> { Token::Mul => { return Ok(Expr::Wildcard(AttachedToken(next_token))); } + // Handle parenthesized wildcard: (*) + Token::LParen => { + let [maybe_mul, maybe_rparen] = self.peek_tokens_ref(); + if maybe_mul.token == Token::Mul && maybe_rparen.token == Token::RParen { + let mul_token = self.next_token(); // consume Mul + self.next_token(); // consume RParen + return Ok(Expr::Wildcard(AttachedToken(mul_token))); + } + } _ => (), }; @@ -1234,6 +1365,7 @@ impl<'a> Parser<'a> { self.parse_subexpr(self.dialect.prec_unknown()) } + /// Parse expression with optional alias and order by. pub fn parse_expr_with_alias_and_order_by( &mut self, ) -> Result { @@ -1254,6 +1386,7 @@ impl<'a> Parser<'a> { } /// Parse tokens until the precedence changes. + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] pub fn parse_subexpr(&mut self, precedence: u8) -> Result { let _guard = self.recursion_counter.try_decrease()?; debug!("parsing expr"); @@ -1281,6 +1414,7 @@ impl<'a> Parser<'a> { Ok(expr) } + /// Parse `ASSERT` statement. pub fn parse_assert(&mut self) -> Result { let condition = self.parse_expr()?; let message = if self.parse_keyword(Keyword::AS) { @@ -1292,11 +1426,13 @@ impl<'a> Parser<'a> { Ok(Statement::Assert { condition, message }) } + /// Parse `SAVEPOINT` statement. pub fn parse_savepoint(&mut self) -> Result { let name = self.parse_identifier()?; Ok(Statement::Savepoint { name }) } + /// Parse `RELEASE` statement. pub fn parse_release(&mut self) -> Result { let _ = self.parse_keyword(Keyword::SAVEPOINT); let name = self.parse_identifier()?; @@ -1304,11 +1440,13 @@ impl<'a> Parser<'a> { Ok(Statement::ReleaseSavepoint { name }) } + /// Parse `LISTEN` statement. pub fn parse_listen(&mut self) -> Result { let channel = self.parse_identifier()?; Ok(Statement::LISTEN { channel }) } + /// Parse `UNLISTEN` statement. pub fn parse_unlisten(&mut self) -> Result { let channel = if self.consume_token(&Token::Mul) { Ident::new(Expr::Wildcard(AttachedToken::empty()).to_string()) @@ -1317,13 +1455,14 @@ impl<'a> Parser<'a> { Ok(expr) => expr, _ => { self.prev_token(); - return self.expected("wildcard or identifier", self.peek_token()); + return self.expected_ref("wildcard or identifier", self.peek_token_ref()); } } }; Ok(Statement::UNLISTEN { channel }) } + /// Parse `NOTIFY` statement. pub fn parse_notify(&mut self) -> Result { let channel = self.parse_identifier()?; let payload = if self.consume_token(&Token::Comma) { @@ -1345,9 +1484,9 @@ impl<'a> Parser<'a> { Ok(RenameTable { old_name, new_name }) })?; - Ok(Statement::RenameTable(rename_tables)) + Ok(rename_tables.into()) } else { - self.expected("KEYWORD `TABLE` after RENAME", self.peek_token()) + self.expected_ref("KEYWORD `TABLE` after RENAME", self.peek_token_ref()) } } @@ -1374,7 +1513,7 @@ impl<'a> Parser<'a> { if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { Ok(Some(Expr::Function(Function { - name: ObjectName::from(vec![w.clone().into_ident(w_span)]), + name: ObjectName::from(vec![w.to_ident(w_span)]), uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, @@ -1389,7 +1528,7 @@ impl<'a> Parser<'a> { | Keyword::CURRENT_DATE | Keyword::LOCALTIME | Keyword::LOCALTIMESTAMP => { - Ok(Some(self.parse_time_functions(ObjectName::from(vec![w.clone().into_ident(w_span)]))?)) + Ok(Some(self.parse_time_functions(ObjectName::from(vec![w.to_ident(w_span)]))?)) } Keyword::CASE => Ok(Some(self.parse_case_expr()?)), Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)), @@ -1414,7 +1553,7 @@ impl<'a> Parser<'a> { Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)), Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)), Keyword::POSITION if self.peek_token_ref().token == Token::LParen => { - Ok(Some(self.parse_position_expr(w.clone().into_ident(w_span))?)) + Ok(Some(self.parse_position_expr(w.to_ident(w_span))?)) } Keyword::SUBSTR | Keyword::SUBSTRING => { self.prev_token(); @@ -1429,14 +1568,14 @@ impl<'a> Parser<'a> { Ok(Some(self.parse_array_expr(true)?)) } Keyword::ARRAY - if self.peek_token() == Token::LParen + if self.peek_token_ref().token == Token::LParen && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) => { self.expect_token(&Token::LParen)?; let query = self.parse_query()?; self.expect_token(&Token::RParen)?; Ok(Some(Expr::Function(Function { - name: ObjectName::from(vec![w.clone().into_ident(w_span)]), + name: ObjectName::from(vec![w.to_ident(w_span)]), uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::Subquery(query), @@ -1461,6 +1600,9 @@ impl<'a> Parser<'a> { Keyword::MAP if *self.peek_token_ref() == Token::LBrace && self.dialect.support_map_literal_syntax() => { Ok(Some(self.parse_duckdb_map_literal()?)) } + Keyword::LAMBDA if self.dialect.supports_lambda_functions() => { + Ok(Some(self.parse_lambda_expr()?)) + } _ if self.dialect.supports_geometric_types() => match w.keyword { Keyword::CIRCLE => Ok(Some(self.parse_geometric_type(GeometricTypeKind::Circle)?)), Keyword::BOX => Ok(Some(self.parse_geometric_type(GeometricTypeKind::GeometricBox)?)), @@ -1481,9 +1623,10 @@ impl<'a> Parser<'a> { w: &Word, w_span: Span, ) -> Result { - match self.peek_token().token { - Token::LParen if !self.peek_outer_join_operator() => { - let id_parts = vec![w.clone().into_ident(w_span)]; + let is_outer_join = self.peek_outer_join_operator(); + match &self.peek_token_ref().token { + Token::LParen if !is_outer_join => { + let id_parts = vec![w.to_ident(w_span)]; self.parse_function(ObjectName::from(id_parts)) } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html @@ -1493,7 +1636,7 @@ impl<'a> Parser<'a> { if w.value.starts_with('_') => { Ok(Expr::Prefixed { - prefix: w.clone().into_ident(w_span), + prefix: w.to_ident(w_span), value: self.parse_introduced_string_expr()?.into(), }) } @@ -1504,18 +1647,43 @@ impl<'a> Parser<'a> { if w.value.starts_with('_') => { Ok(Expr::Prefixed { - prefix: w.clone().into_ident(w_span), + prefix: w.to_ident(w_span), value: self.parse_introduced_string_expr()?.into(), }) } + // An unreserved word (likely an identifier) is followed by an arrow, + // which indicates a lambda function with a single, untyped parameter. + // For example: `a -> a * 2`. Token::Arrow if self.dialect.supports_lambda_functions() => { self.expect_token(&Token::Arrow)?; Ok(Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::One(w.clone().into_ident(w_span)), + params: OneOrManyWithParens::One(LambdaFunctionParameter { + name: w.to_ident(w_span), + data_type: None, + }), + body: Box::new(self.parse_expr()?), + syntax: LambdaSyntax::Arrow, + })) + } + // An unreserved word (likely an identifier) that is followed by another word (likley a data type) + // which is then followed by an arrow, which indicates a lambda function with a single, typed parameter. + // For example: `a INT -> a * 2`. + Token::Word(_) + if self.dialect.supports_lambda_functions() + && self.peek_nth_token_ref(1).token == Token::Arrow => + { + let data_type = self.parse_data_type()?; + self.expect_token(&Token::Arrow)?; + Ok(Expr::Lambda(LambdaFunction { + params: OneOrManyWithParens::One(LambdaFunctionParameter { + name: w.to_ident(w_span), + data_type: Some(data_type), + }), body: Box::new(self.parse_expr()?), + syntax: LambdaSyntax::Arrow, })) } - _ => Ok(Expr::Identifier(w.clone().into_ident(w_span))), + _ => Ok(Expr::Identifier(w.to_ident(w_span))), } } @@ -1554,6 +1722,16 @@ impl<'a> Parser<'a> { // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the // `type 'string'` syntax for the custom data types at all. DataType::Custom(..) => parser_err!("dummy", loc), + // MySQL supports using the `BINARY` keyword as a cast to binary type. + DataType::Binary(..) if self.dialect.supports_binary_kw_as_cast() => { + Ok(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(parser.parse_expr()?), + data_type: DataType::Binary(None), + array: false, + format: None, + }) + } data_type => Ok(Expr::TypedString(TypedString { data_type, value: parser.parse_value()?, @@ -1641,7 +1819,11 @@ impl<'a> Parser<'a> { Token::PGSquareRoot => UnaryOperator::PGSquareRoot, Token::PGCubeRoot => UnaryOperator::PGCubeRoot, Token::AtSign => UnaryOperator::PGAbs, - _ => unreachable!(), + _ => { + return Err(ParserError::ParserError( + "Internal parser error: unexpected unary operator token".to_string(), + )) + } }; Ok(Expr::UnaryOp { op, @@ -1704,23 +1886,41 @@ impl<'a> Parser<'a> { | Token::TripleSingleQuotedRawStringLiteral(_) | Token::TripleDoubleQuotedRawStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } Token::LParen => { - let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Some(lambda) = self.try_parse_lambda()? { - return Ok(lambda); - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - match exprs.len() { - 0 => unreachable!(), // parse_comma_separated ensures 1 or more - 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), - _ => Expr::Tuple(exprs), - } - }; + let expr = + if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Some(lambda) = self.try_parse_lambda()? { + return Ok(lambda); + } else { + // Parentheses in expressions switch to "normal" parsing state. + // This matters for dialects (SQLite, DuckDB) where `NOT NULL` can + // be an alias for `IS NOT NULL`. In column definitions like: + // + // CREATE TABLE t (c INT DEFAULT (42 NOT NULL) NOT NULL) + // + // The `(42 NOT NULL)` is an expression with parens, so it parses + // as `IsNotNull(42)`. The trailing `NOT NULL` is outside those + // expression parens (the outer parens are CREATE TABLE syntax), + // so it remains a column constraint. + let exprs = self.with_state(ParserState::Normal, |p| { + p.parse_comma_separated(Parser::parse_expr) + })?; + match exprs.len() { + 0 => return Err(ParserError::ParserError( + "Internal parser error: parse_comma_separated returned empty list" + .to_string(), + )), + 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), + _ => Expr::Tuple(exprs), + } + }; self.expect_token(&Token::RParen)?; Ok(expr) } @@ -1791,26 +1991,63 @@ impl<'a> Parser<'a> { chain.push(AccessExpr::Dot(expr)); self.advance_token(); // The consumed string } - // Fallback to parsing an arbitrary expression. - _ => match self.parse_subexpr(self.dialect.prec_value(Precedence::Period))? { - // If we get back a compound field access or identifier, - // we flatten the nested expression. - // For example if the current root is `foo` - // and we get back a compound identifier expression `bar.baz` - // The full expression should be `foo.bar.baz` (i.e. - // a root with an access chain with 2 entries) and not - // `foo.(bar.baz)` (i.e. a root with an access chain with - // 1 entry`). - Expr::CompoundFieldAccess { root, access_chain } => { - chain.push(AccessExpr::Dot(*root)); - chain.extend(access_chain); - } - Expr::CompoundIdentifier(parts) => chain - .extend(parts.into_iter().map(Expr::Identifier).map(AccessExpr::Dot)), - expr => { - chain.push(AccessExpr::Dot(expr)); + Token::Placeholder(s) => { + // Snowflake uses $1, $2, etc. for positional column references + // in staged data queries like: SELECT t.$1 FROM @stage t + let expr = Expr::Identifier(Ident::with_span(next_token.span, s)); + chain.push(AccessExpr::Dot(expr)); + self.advance_token(); // The consumed placeholder + } + // Fallback to parsing an arbitrary expression, but restrict to expression + // types that are valid after the dot operator. This ensures that e.g. + // `T.interval` is parsed as a compound identifier, not as an interval + // expression. + _ => { + let expr = self.maybe_parse(|parser| { + let expr = parser + .parse_subexpr(parser.dialect.prec_value(Precedence::Period))?; + match &expr { + Expr::CompoundFieldAccess { .. } + | Expr::CompoundIdentifier(_) + | Expr::Identifier(_) + | Expr::Value(_) + | Expr::Function(_) => Ok(expr), + _ => parser.expected_ref( + "an identifier or value", + parser.peek_token_ref(), + ), + } + })?; + + match expr { + // If we get back a compound field access or identifier, + // we flatten the nested expression. + // For example if the current root is `foo` + // and we get back a compound identifier expression `bar.baz` + // The full expression should be `foo.bar.baz` (i.e. + // a root with an access chain with 2 entries) and not + // `foo.(bar.baz)` (i.e. a root with an access chain with + // 1 entry`). + Some(Expr::CompoundFieldAccess { root, access_chain }) => { + chain.push(AccessExpr::Dot(*root)); + chain.extend(access_chain); + } + Some(Expr::CompoundIdentifier(parts)) => chain.extend( + parts.into_iter().map(Expr::Identifier).map(AccessExpr::Dot), + ), + Some(expr) => { + chain.push(AccessExpr::Dot(expr)); + } + // If the expression is not a valid suffix, fall back to + // parsing as an identifier. This handles cases like `T.interval` + // where `interval` is a keyword but should be treated as an identifier. + None => { + chain.push(AccessExpr::Dot(Expr::Identifier( + self.parse_identifier()?, + ))); + } } - }, + } } } else if !self.dialect.supports_partiql() && self.peek_token_ref().token == Token::LBracket @@ -1824,7 +2061,8 @@ impl<'a> Parser<'a> { let tok_index = self.get_current_index(); if let Some(wildcard_token) = ending_wildcard { if !Self::is_all_ident(&root, &chain) { - return self.expected("an identifier or a '*' after '.'", self.peek_token()); + return self + .expected_ref("an identifier or a '*' after '.'", self.peek_token_ref()); }; Ok(Expr::QualifiedWildcard( ObjectName::from(Self::exprs_to_idents(root, chain)?), @@ -2000,6 +2238,7 @@ impl<'a> Parser<'a> { && self.consume_tokens(&[Token::LParen, Token::Plus, Token::RParen]) } + /// Parse utility options in the form of `(option1, option2 arg2, option3 arg3, ...)` pub fn parse_utility_options(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(Self::parse_utility_option)?; @@ -2011,8 +2250,8 @@ impl<'a> Parser<'a> { fn parse_utility_option(&mut self) -> Result { let name = self.parse_identifier()?; - let next_token = self.peek_token(); - if next_token == Token::Comma || next_token == Token::RParen { + let next_token = self.peek_token_ref(); + if next_token == &Token::Comma || next_token == &Token::RParen { return Ok(UtilityOption { name, arg: None }); } let arg = self.parse_expr()?; @@ -2036,17 +2275,73 @@ impl<'a> Parser<'a> { return Ok(None); } self.maybe_parse(|p| { - let params = p.parse_comma_separated(|p| p.parse_identifier())?; + let params = p.parse_comma_separated(|p| p.parse_lambda_function_parameter())?; p.expect_token(&Token::RParen)?; p.expect_token(&Token::Arrow)?; let expr = p.parse_expr()?; Ok(Expr::Lambda(LambdaFunction { params: OneOrManyWithParens::Many(params), body: Box::new(expr), + syntax: LambdaSyntax::Arrow, })) }) } + /// Parses a lambda expression following the `LAMBDA` keyword syntax. + /// + /// Syntax: `LAMBDA : ` + /// + /// Examples: + /// - `LAMBDA x : x + 1` + /// - `LAMBDA x, i : x > i` + /// + /// See + fn parse_lambda_expr(&mut self) -> Result { + // Parse the parameters: either a single identifier or comma-separated identifiers + let params = self.parse_lambda_function_parameters()?; + // Expect the colon separator + self.expect_token(&Token::Colon)?; + // Parse the body expression + let body = self.parse_expr()?; + Ok(Expr::Lambda(LambdaFunction { + params, + body: Box::new(body), + syntax: LambdaSyntax::LambdaKeyword, + })) + } + + /// Parses the parameters of a lambda function with optional typing. + fn parse_lambda_function_parameters( + &mut self, + ) -> Result, ParserError> { + // Parse the parameters: either a single identifier or comma-separated identifiers + let params = if self.consume_token(&Token::LParen) { + // Parenthesized parameters: (x, y) + let params = self.parse_comma_separated(|p| p.parse_lambda_function_parameter())?; + self.expect_token(&Token::RParen)?; + OneOrManyWithParens::Many(params) + } else { + // Unparenthesized parameters: x or x, y + let params = self.parse_comma_separated(|p| p.parse_lambda_function_parameter())?; + if params.len() == 1 { + OneOrManyWithParens::One(params.into_iter().next().unwrap()) + } else { + OneOrManyWithParens::Many(params) + } + }; + Ok(params) + } + + /// Parses a single parameter of a lambda function, with optional typing. + fn parse_lambda_function_parameter(&mut self) -> Result { + let name = self.parse_identifier()?; + let data_type = match &self.peek_token_ref().token { + Token::Word(_) => self.maybe_parse(|p| p.parse_data_type())?, + _ => None, + }; + Ok(LambdaFunctionParameter { name, data_type }) + } + /// Tries to parse the body of an [ODBC escaping sequence] /// i.e. without the enclosing braces /// Currently implemented: @@ -2109,6 +2404,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a function call expression named by `name` and return it as an `Expr`. pub fn parse_function(&mut self, name: ObjectName) -> Result { self.parse_function_call(name).map(Expr::Function) } @@ -2118,7 +2414,7 @@ impl<'a> Parser<'a> { // Snowflake permits a subquery to be passed as an argument without // an enclosing set of parens if it's the only argument. - if dialect_of!(self is SnowflakeDialect) && self.peek_sub_query() { + if self.dialect.supports_subquery_as_function_arg() && self.peek_sub_query() { let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; return Ok(Function { @@ -2217,6 +2513,7 @@ impl<'a> Parser<'a> { } } + /// Parse time-related function `name` possibly followed by `(...)` arguments. pub fn parse_time_functions(&mut self, name: ObjectName) -> Result { let args = if self.consume_token(&Token::LParen) { FunctionArguments::List(self.parse_function_argument_list()?) @@ -2235,6 +2532,7 @@ impl<'a> Parser<'a> { })) } + /// Parse window frame `UNITS` clause: `ROWS`, `RANGE`, or `GROUPS`. pub fn parse_window_frame_units(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -2248,6 +2546,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `WINDOW` frame definition (units and bounds). pub fn parse_window_frame(&mut self) -> Result { let units = self.parse_window_frame_units()?; let (start_bound, end_bound) = if self.parse_keyword(Keyword::BETWEEN) { @@ -2265,7 +2564,7 @@ impl<'a> Parser<'a> { }) } - /// Parse `CURRENT ROW` or `{ | UNBOUNDED } { PRECEDING | FOLLOWING }` + /// Parse a window frame bound: `CURRENT ROW` or ` PRECEDING|FOLLOWING`. pub fn parse_window_frame_bound(&mut self) -> Result { if self.parse_keywords(&[Keyword::CURRENT, Keyword::ROW]) { Ok(WindowFrameBound::CurrentRow) @@ -2273,7 +2572,7 @@ impl<'a> Parser<'a> { let rows = if self.parse_keyword(Keyword::UNBOUNDED) { None } else { - Some(Box::new(match self.peek_token().token { + Some(Box::new(match &self.peek_token_ref().token { Token::SingleQuotedString(_) => self.parse_interval()?, _ => self.parse_expr()?, })) @@ -2283,7 +2582,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::FOLLOWING) { Ok(WindowFrameBound::Following(rows)) } else { - self.expected("PRECEDING or FOLLOWING", self.peek_token()) + self.expected_ref("PRECEDING or FOLLOWING", self.peek_token_ref()) } } } @@ -2354,6 +2653,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CASE` expression and return an [`Expr::Case`]. pub fn parse_case_expr(&mut self) -> Result { let case_token = AttachedToken(self.get_current_token().clone()); let mut operand = None; @@ -2386,9 +2686,10 @@ impl<'a> Parser<'a> { }) } + /// Parse an optional `FORMAT` clause for `CAST` expressions. pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::FORMAT) { - let value = self.parse_value()?.value; + let value = self.parse_value()?; match self.parse_optional_time_zone()? { Some(tz) => Ok(Some(CastFormat::ValueAtTimeZone(value, tz))), None => Ok(Some(CastFormat::Value(value))), @@ -2398,9 +2699,10 @@ impl<'a> Parser<'a> { } } - pub fn parse_optional_time_zone(&mut self) -> Result, ParserError> { + /// Parse an optional `AT TIME ZONE` clause. + pub fn parse_optional_time_zone(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { - self.parse_value().map(|v| Some(v.value)) + self.parse_value().map(Some) } else { Ok(None) } @@ -2474,12 +2776,14 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword_is(Keyword::AS)?; let data_type = self.parse_data_type()?; + let array = self.parse_keyword(Keyword::ARRAY); let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::Cast { kind, expr: Box::new(expr), data_type, + array, format, }) } @@ -2495,14 +2799,14 @@ impl<'a> Parser<'a> { Ok(exists_node) } + /// Parse a SQL `EXTRACT` expression e.g. `EXTRACT(YEAR FROM date)`. pub fn parse_extract_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let field = self.parse_date_time_field()?; let syntax = if self.parse_keyword(Keyword::FROM) { ExtractSyntax::From - } else if self.consume_token(&Token::Comma) - && dialect_of!(self is SnowflakeDialect | GenericDialect) + } else if self.dialect.supports_extract_comma_syntax() && self.consume_token(&Token::Comma) { ExtractSyntax::Comma } else { @@ -2520,6 +2824,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a `CEIL` or `FLOOR` expression. pub fn parse_ceil_floor_expr(&mut self, is_ceil: bool) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; @@ -2529,13 +2834,13 @@ impl<'a> Parser<'a> { CeilFloorKind::DateTimeField(self.parse_date_time_field()?) } else if self.consume_token(&Token::Comma) { // Parse `CEIL/FLOOR(expr, scale)` - match self.parse_value()?.value { - Value::Number(n, s) => CeilFloorKind::Scale(Value::Number(n, s)), - _ => { - return Err(ParserError::ParserError( - "Scale field can only be of number type".to_string(), - )) - } + let v = self.parse_value()?; + if matches!(v.value, Value::Number(_, _)) { + CeilFloorKind::Scale(v) + } else { + return Err(ParserError::ParserError( + "Scale field can only be of number type".to_string(), + )); } } else { CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) @@ -2554,6 +2859,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `POSITION` expression. pub fn parse_position_expr(&mut self, ident: Ident) -> Result { let between_prec = self.dialect.prec_value(Precedence::Between); let position_expr = self.maybe_parse(|p| { @@ -2578,14 +2884,14 @@ impl<'a> Parser<'a> { } } - // { SUBSTRING | SUBSTR } ( [FROM 1] [FOR 3]) + /// Parse `SUBSTRING`/`SUBSTR` expressions: `SUBSTRING(expr FROM start FOR length)` or `SUBSTR(expr, start, length)`. pub fn parse_substring(&mut self) -> Result { let shorthand = match self.expect_one_of_keywords(&[Keyword::SUBSTR, Keyword::SUBSTRING])? { Keyword::SUBSTR => true, Keyword::SUBSTRING => false, _ => { self.prev_token(); - return self.expected("SUBSTR or SUBSTRING", self.peek_token()); + return self.expected_ref("SUBSTR or SUBSTRING", self.peek_token_ref()); } }; self.expect_token(&Token::LParen)?; @@ -2611,6 +2917,9 @@ impl<'a> Parser<'a> { }) } + /// Parse an OVERLAY expression. + /// + /// See [Expr::Overlay] pub fn parse_overlay_expr(&mut self) -> Result { // PARSE OVERLAY (EXPR PLACING EXPR FROM 1 [FOR 3]) self.expect_token(&Token::LParen)?; @@ -2636,12 +2945,12 @@ impl<'a> Parser<'a> { /// ```sql /// TRIM ([WHERE] ['text' FROM] 'text') /// TRIM ('text') - /// TRIM(, [, characters]) -- only Snowflake or BigQuery + /// TRIM(, [, characters]) -- PostgreSQL, DuckDB, Snowflake, BigQuery, Generic /// ``` pub fn parse_trim_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let mut trim_where = None; - if let Token::Word(word) = self.peek_token().token { + if let Token::Word(word) = &self.peek_token_ref().token { if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING].contains(&word.keyword) { trim_where = Some(self.parse_trim_where()?); } @@ -2657,8 +2966,7 @@ impl<'a> Parser<'a> { trim_what: Some(trim_what), trim_characters: None, }) - } else if self.consume_token(&Token::Comma) - && dialect_of!(self is DuckDbDialect | SnowflakeDialect | BigQueryDialect | GenericDialect) + } else if self.dialect.supports_comma_separated_trim() && self.consume_token(&Token::Comma) { let characters = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; @@ -2679,6 +2987,9 @@ impl<'a> Parser<'a> { } } + /// Parse the `WHERE` field for a `TRIM` expression. + /// + /// See [TrimWhereField] pub fn parse_trim_where(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -2700,13 +3011,16 @@ impl<'a> Parser<'a> { Ok(Expr::Array(Array { elem: exprs, named })) } + /// Parse the `ON OVERFLOW` clause for `LISTAGG`. + /// + /// See [`ListAggOnOverflow`] pub fn parse_listagg_on_overflow(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::ON, Keyword::OVERFLOW]) { if self.parse_keyword(Keyword::ERROR) { Ok(Some(ListAggOnOverflow::Error)) } else { self.expect_keyword_is(Keyword::TRUNCATE)?; - let filler = match self.peek_token().token { + let filler = match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => { @@ -2716,15 +3030,17 @@ impl<'a> Parser<'a> { | Token::EscapedStringLiteral(_) | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), - _ => self.expected( + _ => self.expected_ref( "either filler, WITH, or WITHOUT in LISTAGG", - self.peek_token(), + self.peek_token_ref(), )?, }; let with_count = self.parse_keyword(Keyword::WITH); if !with_count && !self.parse_keyword(Keyword::WITHOUT) { - self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; + self.expected_ref("either WITH or WITHOUT in LISTAGG", self.peek_token_ref())?; } self.expect_keyword_is(Keyword::COUNT)?; Ok(Some(ListAggOnOverflow::Truncate { filler, with_count })) @@ -2734,10 +3050,12 @@ impl<'a> Parser<'a> { } } - // This function parses date/time fields for the EXTRACT function-like - // operator, interval qualifiers, and the ceil/floor operations. - // EXTRACT supports a wider set of date/time fields than interval qualifiers, - // so this function may need to be split in two. + /// Parse a date/time field for `EXTRACT`, interval qualifiers, and ceil/floor operations. + /// + /// `EXTRACT` supports a wider set of date/time fields than interval qualifiers, + /// so this function may need to be split in two. + /// + /// See [`DateTimeField`] pub fn parse_date_time_field(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -2811,8 +3129,11 @@ impl<'a> Parser<'a> { } } + /// Parse a `NOT` expression. + /// + /// Represented in the AST as `Expr::UnaryOp` with `UnaryOperator::Not`. pub fn parse_not(&mut self) -> Result { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) => match w.keyword { Keyword::EXISTS => { let negated = true; @@ -2871,7 +3192,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; // MySQL is too permissive about the value, IMO we can't validate it perfectly on syntax level. - let match_value = self.parse_value()?.value; + let match_value = self.parse_value()?; let in_natural_language_mode_keywords = &[ Keyword::IN, @@ -2950,7 +3271,7 @@ impl<'a> Parser<'a> { } else if self.dialect.require_interval_qualifier() { return parser_err!( "INTERVAL requires a unit after the literal value", - self.peek_token().span.start + self.peek_token_ref().span.start ); } else { None @@ -2993,7 +3314,7 @@ impl<'a> Parser<'a> { /// Peek at the next token and determine if it is a temporal unit /// like `second`. pub fn next_token_is_temporal_unit(&mut self) -> bool { - if let Token::Word(word) = self.peek_token().token { + if let Token::Word(word) = &self.peek_token_ref().token { matches!( word.keyword, Keyword::YEAR @@ -3051,7 +3372,7 @@ impl<'a> Parser<'a> { if trailing_bracket.0 { return parser_err!( "unmatched > in STRUCT literal", - self.peek_token().span.start + self.peek_token_ref().span.start ); } @@ -3083,7 +3404,7 @@ impl<'a> Parser<'a> { if typed_syntax { return parser_err!("Typed syntax does not allow AS", { self.prev_token(); - self.peek_token().span.start + self.peek_token_ref().span.start }); } let field_name = self.parse_identifier()?; @@ -3118,7 +3439,7 @@ impl<'a> Parser<'a> { self.expect_keyword_is(Keyword::STRUCT)?; // Nothing to do if we have no type information. - if Token::Lt != self.peek_token() { + if self.peek_token_ref().token != Token::Lt { return Ok((Default::default(), false.into())); } self.next_token(); @@ -3162,24 +3483,28 @@ impl<'a> Parser<'a> { /// /// ```sql /// [field_name] field_type + /// field_name: field_type /// ``` /// /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + /// [databricks]: https://docs.databricks.com/en/sql/language-manual/data-types/struct-type.html fn parse_struct_field_def( &mut self, ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { // Look beyond the next item to infer whether both field name // and type are specified. - let is_anonymous_field = !matches!( + let is_named_field = matches!( (self.peek_nth_token(0).token, self.peek_nth_token(1).token), - (Token::Word(_), Token::Word(_)) + (Token::Word(_), Token::Word(_)) | (Token::Word(_), Token::Colon) ); - let field_name = if is_anonymous_field { - None + let field_name = if is_named_field { + let name = self.parse_identifier()?; + let _ = self.consume_token(&Token::Colon); + Some(name) } else { - Some(self.parse_identifier()?) + None }; let (field_type, trailing_bracket) = self.parse_data_type_helper()?; @@ -3290,7 +3615,8 @@ impl<'a> Parser<'a> { /// /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps fn parse_duckdb_map_field(&mut self) -> Result { - let key = self.parse_expr()?; + // Stop before `:` so it can act as a key/value separator + let key = self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?; self.expect_token(&Token::Colon)?; @@ -3356,7 +3682,7 @@ impl<'a> Parser<'a> { trailing_bracket: MatchedTrailingBracket, ) -> Result { let trailing_bracket = if !trailing_bracket.0 { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Gt => { self.next_token(); false.into() @@ -3365,7 +3691,7 @@ impl<'a> Parser<'a> { self.next_token(); true.into() } - _ => return self.expected(">", self.peek_token()), + _ => return self.expected_ref(">", self.peek_token_ref()), } } else { false.into() @@ -3418,10 +3744,10 @@ impl<'a> Parser<'a> { Token::DuckIntDiv if dialect_is!(dialect is DuckDbDialect | GenericDialect) => { Some(BinaryOperator::DuckIntegerDivide) } - Token::ShiftLeft if dialect_is!(dialect is PostgreSqlDialect | DuckDbDialect | GenericDialect | RedshiftSqlDialect) => { + Token::ShiftLeft if dialect.supports_bitwise_shift_operators() => { Some(BinaryOperator::PGBitwiseShiftLeft) } - Token::ShiftRight if dialect_is!(dialect is PostgreSqlDialect | DuckDbDialect | GenericDialect | RedshiftSqlDialect) => { + Token::ShiftRight if dialect.supports_bitwise_shift_operators() => { Some(BinaryOperator::PGBitwiseShiftRight) } Token::Sharp if dialect_is!(dialect is PostgreSqlDialect | RedshiftSqlDialect) => { @@ -3433,6 +3759,9 @@ impl<'a> Parser<'a> { Token::Overlap if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { Some(BinaryOperator::PGOverlap) } + Token::Overlap if dialect.supports_double_ampersand_operator() => { + Some(BinaryOperator::And) + } Token::CaretAt if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { Some(BinaryOperator::PGStartsWith) } @@ -3591,7 +3920,9 @@ impl<'a> Parser<'a> { right: Box::new(right), is_some: keyword == Keyword::SOME, }, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{ALL, ANY, SOME}}, got {unexpected_keyword:?}"), + )), }) } else { Ok(Expr::BinaryOp { @@ -3629,9 +3960,9 @@ impl<'a> Parser<'a> { } else if let Ok(is_normalized) = self.parse_unicode_is_normalized(expr) { Ok(is_normalized) } else { - self.expected( + self.expected_ref( "[NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS", - self.peek_token(), + self.peek_token_ref(), ) } } @@ -3704,7 +4035,7 @@ impl<'a> Parser<'a> { escape_char: self.parse_escape_char()?, }) } else { - self.expected("IN or BETWEEN after NOT", self.peek_token()) + self.expected_ref("IN or BETWEEN after NOT", self.peek_token_ref()) } } Keyword::NOTNULL if dialect.supports_notnull_operator() => { @@ -3720,7 +4051,7 @@ impl<'a> Parser<'a> { array: Box::new(array), })) } else { - self.expected("OF after MEMBER", self.peek_token()) + self.expected_ref("OF after MEMBER", self.peek_token_ref()) } } // Can only happen if `get_next_precedence` got out of sync with this function @@ -3734,6 +4065,7 @@ impl<'a> Parser<'a> { kind: CastKind::DoubleColon, expr: Box::new(expr), data_type: self.parse_data_type()?, + array: false, format: None, }) } else if Token::ExclamationMark == *tok && self.dialect.supports_factorial_operator() { @@ -3742,8 +4074,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), }) } else if Token::LBracket == *tok && self.dialect.supports_partiql() - || (dialect_of!(self is SnowflakeDialect | GenericDialect | DatabricksDialect) - && Token::Colon == *tok) + || (Token::Colon == *tok) { self.prev_token(); self.parse_json_access(expr) @@ -3757,9 +4088,9 @@ impl<'a> Parser<'a> { } /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` - pub fn parse_escape_char(&mut self) -> Result, ParserError> { + pub fn parse_escape_char(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::ESCAPE) { - Ok(Some(self.parse_value()?.into())) + Ok(Some(self.parse_value()?)) } else { Ok(None) } @@ -3779,7 +4110,8 @@ impl<'a> Parser<'a> { let lower_bound = if self.consume_token(&Token::Colon) { None } else { - Some(self.parse_expr()?) + // parse expr until we hit a colon (or any token with lower precedence) + Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?) }; // check for end @@ -3807,7 +4139,8 @@ impl<'a> Parser<'a> { stride: None, }); } else { - Some(self.parse_expr()?) + // parse expr until we hit a colon (or any token with lower precedence) + Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?) }; // check for end @@ -3863,8 +4196,9 @@ impl<'a> Parser<'a> { match token.token { Token::Word(Word { value, - // path segments in SF dot notation can be unquoted or double-quoted - quote_style: quote_style @ (Some('"') | None), + // path segments in SF dot notation can be unquoted or double-quoted; + // Databricks also supports backtick-quoted identifiers + quote_style: quote_style @ (Some('"') | Some('`') | None), // some experimentation suggests that snowflake permits // any keyword here unquoted. keyword: _, @@ -3896,6 +4230,12 @@ impl<'a> Parser<'a> { let mut has_colon = false; loop { match self.next_token().token { + Token::Colon if path.is_empty() && self.peek_token_ref() == &Token::LBracket => { + self.next_token(); + let key = self.parse_wildcard_expr()?; + self.expect_token(&Token::RBracket)?; + path.push(JsonPathElem::ColonBracket { key }); + } Token::Colon if path.is_empty() => { has_colon = true; if *self.peek_token_ref() == Token::LBracket { @@ -4003,6 +4343,7 @@ impl<'a> Parser<'a> { kind: CastKind::DoubleColon, expr: Box::new(expr), data_type: self.parse_data_type()?, + array: false, format: None, }) } @@ -4144,6 +4485,11 @@ impl<'a> Parser<'a> { }) } + /// Return nth token, possibly whitespace, that has not yet been processed. + fn peek_nth_token_no_skip_ref(&self, n: usize) -> &TokenWithSpan { + self.tokens.get(self.index + n).unwrap_or(&EOF_TOKEN) + } + /// Return true if the next tokens exactly `expected` /// /// Does not advance the current token. @@ -4273,6 +4619,9 @@ impl<'a> Parser<'a> { } #[must_use] + /// Check if the current token is the expected keyword without consuming it. + /// + /// Returns true if the current token matches the expected keyword. pub fn peek_keyword(&self, expected: Keyword) -> bool { matches!(&self.peek_token_ref().token, Token::Word(w) if expected == w.keyword) } @@ -4322,16 +4671,25 @@ impl<'a> Parser<'a> { /// consumed and returns false #[must_use] pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { - let index = self.index; + self.parse_keywords_indexed(keywords).is_some() + } + + /// Just like [Self::parse_keywords], but - upon success - returns the + /// token index of the first keyword. + #[must_use] + fn parse_keywords_indexed(&mut self, keywords: &[Keyword]) -> Option { + let start_index = self.index; + let mut first_keyword_index = None; for &keyword in keywords { if !self.parse_keyword(keyword) { - // println!("parse_keywords aborting .. did not find {:?}", keyword); - // reset index and return immediately - self.index = index; - return false; + self.index = start_index; + return None; + } + if first_keyword_index.is_none() { + first_keyword_index = Some(self.index.saturating_sub(1)); } } - true + first_keyword_index } /// If the current token is one of the given `keywords`, returns the keyword @@ -4479,6 +4837,7 @@ impl<'a> Parser<'a> { ) } + /// Parse a list of actions for `GRANT` statements. pub fn parse_actions_list(&mut self) -> Result, ParserError> { let mut values = vec![]; loop { @@ -4486,7 +4845,7 @@ impl<'a> Parser<'a> { if !self.consume_token(&Token::Comma) { break; } else if self.options.trailing_commas { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(kw) if kw.keyword == Keyword::ON => { break; } @@ -4628,6 +4987,7 @@ impl<'a> Parser<'a> { Ok(values) } + /// Parse an expression enclosed in parentheses. pub fn parse_parenthesized(&mut self, mut f: F) -> Result where F: FnMut(&mut Parser<'a>) -> Result, @@ -4648,7 +5008,7 @@ impl<'a> Parser<'a> { where F: FnMut(&mut Parser<'a>) -> Result, { - if self.peek_token().token == end_token { + if self.peek_token_ref().token == end_token { return Ok(vec![]); } @@ -4725,17 +5085,28 @@ impl<'a> Parser<'a> { /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { - let loc = self.peek_token().span.start; - let all = self.parse_keyword(Keyword::ALL); - let distinct = self.parse_keyword(Keyword::DISTINCT); - if !distinct { - return Ok(None); - } - if all { - return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc); - } - let on = self.parse_keyword(Keyword::ON); - if !on { + let loc = self.peek_token_ref().span.start; + let distinct = match self.parse_one_of_keywords(&[Keyword::ALL, Keyword::DISTINCT]) { + Some(Keyword::ALL) => { + if self.peek_keyword(Keyword::DISTINCT) { + return parser_err!("Cannot specify ALL then DISTINCT".to_string(), loc); + } + Some(Distinct::All) + } + Some(Keyword::DISTINCT) => { + if self.peek_keyword(Keyword::ALL) { + return parser_err!("Cannot specify DISTINCT then ALL".to_string(), loc); + } + Some(Distinct::Distinct) + } + None => return Ok(None), + _ => return parser_err!("ALL or DISTINCT", loc), + }; + + let Some(Distinct::Distinct) = distinct else { + return Ok(distinct); + }; + if !self.parse_keyword(Keyword::ON) { return Ok(Some(Distinct::Distinct)); } @@ -4770,43 +5141,49 @@ impl<'a> Parser<'a> { let persistent = dialect_of!(self is DuckDbDialect) && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); let create_view_params = self.parse_create_view_params()?; - if self.parse_keyword(Keyword::TABLE) { + if self.peek_keywords(&[Keyword::SNAPSHOT, Keyword::TABLE]) { + self.parse_create_snapshot_table().map(Into::into) + } else if self.parse_keyword(Keyword::TABLE) { self.parse_create_table(or_replace, temporary, global, transient) + .map(Into::into) } else if self.peek_keyword(Keyword::MATERIALIZED) || self.peek_keyword(Keyword::VIEW) || self.peek_keywords(&[Keyword::SECURE, Keyword::MATERIALIZED, Keyword::VIEW]) || self.peek_keywords(&[Keyword::SECURE, Keyword::VIEW]) { self.parse_create_view(or_alter, or_replace, temporary, create_view_params) + .map(Into::into) } else if self.parse_keyword(Keyword::POLICY) { - self.parse_create_policy() + self.parse_create_policy().map(Into::into) } else if self.parse_keyword(Keyword::EXTERNAL) { - self.parse_create_external_table(or_replace) + self.parse_create_external_table(or_replace).map(Into::into) } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_create_function(or_alter, or_replace, temporary) } else if self.parse_keyword(Keyword::DOMAIN) { - self.parse_create_domain() + self.parse_create_domain().map(Into::into) } else if self.parse_keyword(Keyword::TRIGGER) { self.parse_create_trigger(temporary, or_alter, or_replace, false) + .map(Into::into) } else if self.parse_keywords(&[Keyword::CONSTRAINT, Keyword::TRIGGER]) { self.parse_create_trigger(temporary, or_alter, or_replace, true) + .map(Into::into) } else if self.parse_keyword(Keyword::MACRO) { self.parse_create_macro(or_replace, temporary) } else if self.parse_keyword(Keyword::SECRET) { self.parse_create_secret(or_replace, temporary, persistent) } else if self.parse_keyword(Keyword::USER) { - self.parse_create_user(or_replace) + self.parse_create_user(or_replace).map(Into::into) } else if or_replace { - self.expected( + self.expected_ref( "[EXTERNAL] TABLE or [MATERIALIZED] VIEW or FUNCTION after CREATE OR REPLACE", - self.peek_token(), + self.peek_token_ref(), ) } else if self.parse_keyword(Keyword::EXTENSION) { - self.parse_create_extension() + self.parse_create_extension().map(Into::into) } else if self.parse_keyword(Keyword::INDEX) { - self.parse_create_index(false) + self.parse_create_index(false).map(Into::into) } else if self.parse_keywords(&[Keyword::UNIQUE, Keyword::INDEX]) { - self.parse_create_index(true) + self.parse_create_index(true).map(Into::into) } else if self.parse_keyword(Keyword::VIRTUAL) { self.parse_create_virtual_table() } else if self.parse_keyword(Keyword::SCHEMA) { @@ -4814,7 +5191,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::DATABASE) { self.parse_create_database() } else if self.parse_keyword(Keyword::ROLE) { - self.parse_create_role() + self.parse_create_role().map(Into::into) } else if self.parse_keyword(Keyword::SEQUENCE) { self.parse_create_sequence(temporary) } else if self.parse_keyword(Keyword::TYPE) { @@ -4822,24 +5199,24 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::PROCEDURE) { self.parse_create_procedure(or_alter) } else if self.parse_keyword(Keyword::CONNECTOR) { - self.parse_create_connector() + self.parse_create_connector().map(Into::into) } else if self.parse_keyword(Keyword::OPERATOR) { // Check if this is CREATE OPERATOR FAMILY or CREATE OPERATOR CLASS if self.parse_keyword(Keyword::FAMILY) { - self.parse_create_operator_family() + self.parse_create_operator_family().map(Into::into) } else if self.parse_keyword(Keyword::CLASS) { - self.parse_create_operator_class() + self.parse_create_operator_class().map(Into::into) } else { - self.parse_create_operator() + self.parse_create_operator().map(Into::into) } } else if self.parse_keyword(Keyword::SERVER) { self.parse_pg_create_server() } else { - self.expected("an object type after CREATE", self.peek_token()) + self.expected_ref("an object type after CREATE", self.peek_token_ref()) } } - fn parse_create_user(&mut self, or_replace: bool) -> Result { + fn parse_create_user(&mut self, or_replace: bool) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let name = self.parse_identifier()?; let options = self @@ -4851,7 +5228,7 @@ impl<'a> Parser<'a> { } else { vec![] }; - Ok(Statement::CreateUser(CreateUser { + Ok(CreateUser { or_replace, if_not_exists, name, @@ -4864,7 +5241,7 @@ impl<'a> Parser<'a> { options: tags, delimiter: KeyValueOptionsDelimiter::Comma, }, - })) + }) } /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. @@ -4878,7 +5255,7 @@ impl<'a> Parser<'a> { let mut storage_specifier = None; let mut name = None; - if self.peek_token() != Token::LParen { + if self.peek_token_ref().token != Token::LParen { if self.parse_keyword(Keyword::IN) { storage_specifier = self.parse_identifier().ok() } else { @@ -4887,7 +5264,7 @@ impl<'a> Parser<'a> { // Storage specifier may follow the name if storage_specifier.is_none() - && self.peek_token() != Token::LParen + && self.peek_token_ref().token != Token::LParen && self.parse_keyword(Keyword::IN) { storage_specifier = self.parse_identifier().ok(); @@ -4912,7 +5289,7 @@ impl<'a> Parser<'a> { (true, false) => Some(true), (false, true) => Some(false), (false, false) => None, - _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + _ => self.expected_ref("TEMPORARY or PERSISTENT", self.peek_token_ref())?, }; Ok(Statement::CreateSecret { @@ -4931,14 +5308,14 @@ impl<'a> Parser<'a> { let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); if self.parse_keyword(Keyword::TABLE) { let table_name = self.parse_object_name(false)?; - if self.peek_token().token != Token::EOF { - if let Token::Word(word) = self.peek_token().token { + if self.peek_token_ref().token != Token::EOF { + if let Token::Word(word) = &self.peek_token_ref().token { if word.keyword == Keyword::OPTIONS { options = self.parse_options(Keyword::OPTIONS)? } }; - if self.peek_token().token != Token::EOF { + if self.peek_token_ref().token != Token::EOF { let (a, q) = self.parse_as_query()?; has_as = a; query = Some(q); @@ -4964,14 +5341,14 @@ impl<'a> Parser<'a> { table_flag = Some(self.parse_object_name(false)?); if self.parse_keyword(Keyword::TABLE) { let table_name = self.parse_object_name(false)?; - if self.peek_token() != Token::EOF { - if let Token::Word(word) = self.peek_token().token { + if self.peek_token_ref().token != Token::EOF { + if let Token::Word(word) = &self.peek_token_ref().token { if word.keyword == Keyword::OPTIONS { options = self.parse_options(Keyword::OPTIONS)? } }; - if self.peek_token() != Token::EOF { + if self.peek_token_ref().token != Token::EOF { let (a, q) = self.parse_as_query()?; has_as = a; query = Some(q); @@ -4994,17 +5371,17 @@ impl<'a> Parser<'a> { }) } } else { - if self.peek_token() == Token::EOF { + if self.peek_token_ref().token == Token::EOF { self.prev_token(); } - self.expected("a `TABLE` keyword", self.peek_token()) + self.expected_ref("a `TABLE` keyword", self.peek_token_ref()) } } } /// Parse 'AS' before as query,such as `WITH XXX AS SELECT XXX` oer `CACHE TABLE AS SELECT XXX` pub fn parse_as_query(&mut self) -> Result<(bool, Box), ParserError> { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(word) => match word.keyword { Keyword::AS => { self.next_token(); @@ -5012,7 +5389,7 @@ impl<'a> Parser<'a> { } _ => Ok((false, self.parse_query()?)), }, - _ => self.expected("a QUERY statement", self.peek_token()), + _ => self.expected_ref("a QUERY statement", self.peek_token_ref()), } } @@ -5047,6 +5424,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a `CREATE SCHEMA` statement. pub fn parse_create_schema(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -5103,6 +5481,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CREATE DATABASE` statement. pub fn parse_create_database(&mut self) -> Result { let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let db_name = self.parse_object_name(false)?; @@ -5123,6 +5502,34 @@ impl<'a> Parser<'a> { None }; + // Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE options + // + // Note: The docs only mention `CHARACTER SET`, but `CHARSET` is also supported. + // Furthermore, MySQL will only accept one character set, raising an error if there is more + // than one, but will accept multiple collations and use the last one. + // + // + let mut default_charset = None; + let mut default_collation = None; + loop { + let has_default = self.parse_keyword(Keyword::DEFAULT); + if default_charset.is_none() && self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) + || self.parse_keyword(Keyword::CHARSET) + { + let _ = self.consume_token(&Token::Eq); + default_charset = Some(self.parse_identifier()?.value); + } else if self.parse_keyword(Keyword::COLLATE) { + let _ = self.consume_token(&Token::Eq); + default_collation = Some(self.parse_identifier()?.value); + } else if has_default { + // DEFAULT keyword not followed by CHARACTER SET, CHARSET, or COLLATE + self.prev_token(); + break; + } else { + break; + } + } + Ok(Statement::CreateDatabase { db_name, if_not_exists: ine, @@ -5139,6 +5546,8 @@ impl<'a> Parser<'a> { default_ddl_collation: None, storage_serialization_policy: None, comment: None, + default_charset, + default_collation, catalog_sync: None, catalog_sync_namespace_mode: None, catalog_sync_namespace_flatten_delimiter: None, @@ -5147,6 +5556,7 @@ impl<'a> Parser<'a> { }) } + /// Parse an optional `USING` clause for `CREATE FUNCTION`. pub fn parse_optional_create_function_using( &mut self, ) -> Result, ParserError> { @@ -5169,6 +5579,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CREATE FUNCTION` statement. pub fn parse_create_function( &mut self, or_alter: bool, @@ -5177,17 +5588,21 @@ impl<'a> Parser<'a> { ) -> Result { if dialect_of!(self is HiveDialect) { self.parse_hive_create_function(or_replace, temporary) + .map(Into::into) } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) { self.parse_postgres_create_function(or_replace, temporary) + .map(Into::into) } else if dialect_of!(self is DuckDbDialect) { self.parse_create_macro(or_replace, temporary) } else if dialect_of!(self is BigQueryDialect) { self.parse_bigquery_create_function(or_replace, temporary) + .map(Into::into) } else if dialect_of!(self is MsSqlDialect) { self.parse_mssql_create_function(or_alter, or_replace, temporary) + .map(Into::into) } else { self.prev_token(); - self.expected("an object type after CREATE", self.peek_token()) + self.expected_ref("an object type after CREATE", self.peek_token_ref()) } } @@ -5198,7 +5613,7 @@ impl<'a> Parser<'a> { &mut self, or_replace: bool, temporary: bool, - ) -> Result { + ) -> Result { let name = self.parse_object_name(false)?; self.expect_token(&Token::LParen)?; @@ -5210,7 +5625,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) + Some(self.parse_function_return_type()?) } else { None }; @@ -5222,8 +5637,10 @@ impl<'a> Parser<'a> { function_body: Option, called_on_null: Option, parallel: Option, + security: Option, } let mut body = Body::default(); + let mut set_params: Vec = Vec::new(); loop { fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { if field.is_some() { @@ -5286,8 +5703,30 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::SAFE) { body.parallel = Some(FunctionParallel::Safe); } else { - return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); + return self + .expected_ref("one of UNSAFE | RESTRICTED | SAFE", self.peek_token_ref()); + } + } else if self.parse_keyword(Keyword::SECURITY) { + ensure_not_set(&body.security, "SECURITY { DEFINER | INVOKER }")?; + if self.parse_keyword(Keyword::DEFINER) { + body.security = Some(FunctionSecurity::Definer); + } else if self.parse_keyword(Keyword::INVOKER) { + body.security = Some(FunctionSecurity::Invoker); + } else { + return self.expected_ref("DEFINER or INVOKER", self.peek_token_ref()); } + } else if self.parse_keyword(Keyword::SET) { + let name = self.parse_identifier()?; + let value = if self.parse_keywords(&[Keyword::FROM, Keyword::CURRENT]) { + FunctionSetValue::FromCurrent + } else { + if !self.consume_token(&Token::Eq) && !self.parse_keyword(Keyword::TO) { + return self.expected_ref("= or TO", self.peek_token_ref()); + } + let values = self.parse_comma_separated(Parser::parse_expr)?; + FunctionSetValue::Values(values) + }; + set_params.push(FunctionDefinitionSetParam { name, value }); } else if self.parse_keyword(Keyword::RETURN) { ensure_not_set(&body.function_body, "RETURN")?; body.function_body = Some(CreateFunctionBody::Return(self.parse_expr()?)); @@ -5296,7 +5735,7 @@ impl<'a> Parser<'a> { } } - Ok(Statement::CreateFunction(CreateFunction { + Ok(CreateFunction { or_alter: false, or_replace, temporary, @@ -5306,6 +5745,8 @@ impl<'a> Parser<'a> { behavior: body.behavior, called_on_null: body.called_on_null, parallel: body.parallel, + security: body.security, + set_params, language: body.language, function_body: body.function_body, if_not_exists: false, @@ -5313,7 +5754,7 @@ impl<'a> Parser<'a> { determinism_specifier: None, options: None, remote_connection: None, - })) + }) } /// Parse `CREATE FUNCTION` for [Hive] @@ -5323,14 +5764,14 @@ impl<'a> Parser<'a> { &mut self, or_replace: bool, temporary: bool, - ) -> Result { + ) -> Result { let name = self.parse_object_name(false)?; self.expect_keyword_is(Keyword::AS)?; let body = self.parse_create_function_body_string()?; let using = self.parse_optional_create_function_using()?; - Ok(Statement::CreateFunction(CreateFunction { + Ok(CreateFunction { or_alter: false, or_replace, temporary, @@ -5343,11 +5784,13 @@ impl<'a> Parser<'a> { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], language: None, determinism_specifier: None, options: None, remote_connection: None, - })) + }) } /// Parse `CREATE FUNCTION` for [BigQuery] @@ -5357,12 +5800,12 @@ impl<'a> Parser<'a> { &mut self, or_replace: bool, temporary: bool, - ) -> Result { + ) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let (name, args) = self.parse_create_function_name_and_params()?; let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) + Some(self.parse_function_return_type()?) } else { None }; @@ -5408,7 +5851,7 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::CreateFunction(CreateFunction { + Ok(CreateFunction { or_alter: false, or_replace, temporary, @@ -5425,7 +5868,9 @@ impl<'a> Parser<'a> { behavior: None, called_on_null: None, parallel: None, - })) + security: None, + set_params: vec![], + }) } /// Parse `CREATE FUNCTION` for [MsSql] @@ -5436,7 +5881,7 @@ impl<'a> Parser<'a> { or_alter: bool, or_replace: bool, temporary: bool, - ) -> Result { + ) -> Result { let (name, args) = self.parse_create_function_name_and_params()?; self.expect_keyword(Keyword::RETURNS)?; @@ -5453,7 +5898,7 @@ impl<'a> Parser<'a> { } _ => parser_err!( "Expected table column definitions after TABLE keyword", - p.peek_token().span.start + p.peek_token_ref().span.start )?, }; @@ -5463,11 +5908,11 @@ impl<'a> Parser<'a> { }) })?; - let return_type = if return_table.is_some() { - return_table - } else { - Some(self.parse_data_type()?) + let data_type = match return_table { + Some(table_type) => table_type, + None => self.parse_data_type()?, }; + let return_type = Some(FunctionReturnType::DataType(data_type)); let _ = self.parse_keyword(Keyword::AS); @@ -5482,7 +5927,7 @@ impl<'a> Parser<'a> { end_token: AttachedToken(end_token), })) } else if self.parse_keyword(Keyword::RETURN) { - if self.peek_token() == Token::LParen { + if self.peek_token_ref().token == Token::LParen { Some(CreateFunctionBody::AsReturnExpr(self.parse_expr()?)) } else if self.peek_keyword(Keyword::SELECT) { let select = self.parse_select()?; @@ -5490,14 +5935,14 @@ impl<'a> Parser<'a> { } else { parser_err!( "Expected a subquery (or bare SELECT statement) after RETURN", - self.peek_token().span.start + self.peek_token_ref().span.start )? } } else { - parser_err!("Unparsable function body", self.peek_token().span.start)? + parser_err!("Unparsable function body", self.peek_token_ref().span.start)? }; - Ok(Statement::CreateFunction(CreateFunction { + Ok(CreateFunction { or_alter, or_replace, temporary, @@ -5514,7 +5959,17 @@ impl<'a> Parser<'a> { behavior: None, called_on_null: None, parallel: None, - })) + security: None, + set_params: vec![], + }) + } + + fn parse_function_return_type(&mut self) -> Result { + if self.parse_keyword(Keyword::SETOF) { + Ok(FunctionReturnType::SetOf(self.parse_data_type()?)) + } else { + Ok(FunctionReturnType::DataType(self.parse_data_type()?)) + } } fn parse_create_function_name_and_params( @@ -5570,7 +6025,7 @@ impl<'a> Parser<'a> { // This dummy error is ignored in `maybe_parse` parser_err!( "The DEFAULT keyword is not a type", - parser.peek_token().span.start + parser.peek_token_ref().span.start ) } else { parser.parse_data_type() @@ -5608,11 +6063,11 @@ impl<'a> Parser<'a> { /// ```sql /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] /// ``` - pub fn parse_drop_trigger(&mut self) -> Result { + pub fn parse_drop_trigger(&mut self) -> Result { if !dialect_of!(self is PostgreSqlDialect | SQLiteDialect | GenericDialect | MySqlDialect | MsSqlDialect) { self.prev_token(); - return self.expected("an object type after DROP", self.peek_token()); + return self.expected_ref("an object type after DROP", self.peek_token_ref()); } let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let trigger_name = self.parse_object_name(false)?; @@ -5621,32 +6076,34 @@ impl<'a> Parser<'a> { } else { None }; - let option = self - .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) - .map(|keyword| match keyword { - Keyword::CASCADE => ReferentialAction::Cascade, - Keyword::RESTRICT => ReferentialAction::Restrict, - _ => unreachable!(), - }); - Ok(Statement::DropTrigger(DropTrigger { + let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + Some(unexpected_keyword) => return Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{CASCADE, RESTRICT}}, got {unexpected_keyword:?}"), + )), + None => None, + }; + Ok(DropTrigger { if_exists, trigger_name, table_name, option, - })) + }) } + /// Parse a `CREATE TRIGGER` statement. pub fn parse_create_trigger( &mut self, temporary: bool, or_alter: bool, or_replace: bool, is_constraint: bool, - ) -> Result { + ) -> Result { if !dialect_of!(self is PostgreSqlDialect | SQLiteDialect | GenericDialect | MySqlDialect | MsSqlDialect) { self.prev_token(); - return self.expected("an object type after CREATE", self.peek_token()); + return self.expected_ref("an object type after CREATE", self.peek_token_ref()); } let name = self.parse_object_name(false)?; @@ -5677,7 +6134,9 @@ impl<'a> Parser<'a> { match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { Keyword::ROW => TriggerObject::Row, Keyword::STATEMENT => TriggerObject::Statement, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in ROW/STATEMENT"), + )), }; Some(if include_each { @@ -5722,10 +6181,10 @@ impl<'a> Parser<'a> { statements_as: false, statements, characteristics, - } - .into()) + }) } + /// Parse the period part of a trigger (`BEFORE`, `AFTER`, etc.). pub fn parse_trigger_period(&mut self) -> Result { Ok( match self.expect_one_of_keywords(&[ @@ -5740,11 +6199,14 @@ impl<'a> Parser<'a> { Keyword::INSTEAD => self .expect_keyword_is(Keyword::OF) .map(|_| TriggerPeriod::InsteadOf)?, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in trigger period"), + )), }, ) } + /// Parse the event part of a trigger (`INSERT`, `UPDATE`, etc.). pub fn parse_trigger_event(&mut self) -> Result { Ok( match self.expect_one_of_keywords(&[ @@ -5764,11 +6226,14 @@ impl<'a> Parser<'a> { } Keyword::DELETE => TriggerEvent::Delete, Keyword::TRUNCATE => TriggerEvent::Truncate, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in trigger event"), + )), }, ) } + /// Parse the `REFERENCING` clause of a trigger. pub fn parse_trigger_referencing(&mut self) -> Result, ParserError> { let refer_type = match self.parse_one_of_keywords(&[Keyword::OLD, Keyword::NEW]) { Some(Keyword::OLD) if self.parse_keyword(Keyword::TABLE) => { @@ -5791,6 +6256,7 @@ impl<'a> Parser<'a> { })) } + /// Parse the execution body of a trigger (`FUNCTION` or `PROCEDURE`). pub fn parse_trigger_exec_body(&mut self) -> Result { Ok(TriggerExecBody { exec_type: match self @@ -5798,12 +6264,15 @@ impl<'a> Parser<'a> { { Keyword::FUNCTION => TriggerExecBodyType::Function, Keyword::PROCEDURE => TriggerExecBodyType::Procedure, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in trigger exec body"), + )), }, func_desc: self.parse_function_desc()?, }) } + /// Parse a `CREATE MACRO` statement. pub fn parse_create_macro( &mut self, or_replace: bool, @@ -5835,7 +6304,7 @@ impl<'a> Parser<'a> { }) } else { self.prev_token(); - self.expected("an object type after CREATE", self.peek_token()) + self.expected_ref("an object type after CREATE", self.peek_token_ref()) } } @@ -5851,10 +6320,11 @@ impl<'a> Parser<'a> { Ok(MacroArg { name, default_expr }) } + /// Parse a `CREATE EXTERNAL TABLE` statement. pub fn parse_create_external_table( &mut self, or_replace: bool, - ) -> Result { + ) -> Result { self.expect_keyword_is(Keyword::TABLE)?; let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name(false)?; @@ -5879,6 +6349,8 @@ impl<'a> Parser<'a> { let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; let table_options = if !table_properties.is_empty() { CreateTableOptions::TableProperties(table_properties) + } else if let Some(options) = self.maybe_parse_options(Keyword::OPTIONS)? { + CreateTableOptions::Options(options) } else { CreateTableOptions::None }; @@ -5896,6 +6368,41 @@ impl<'a> Parser<'a> { .build()) } + /// Parse `CREATE SNAPSHOT TABLE` statement. + /// + /// + pub fn parse_create_snapshot_table(&mut self) -> Result { + self.expect_keywords(&[Keyword::SNAPSHOT, Keyword::TABLE])?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name(true)?; + + self.expect_keyword_is(Keyword::CLONE)?; + let clone = Some(self.parse_object_name(true)?); + + let version = + if self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) + { + Some(TableVersion::ForSystemTimeAsOf(self.parse_expr()?)) + } else { + None + }; + + let table_options = if let Some(options) = self.maybe_parse_options(Keyword::OPTIONS)? { + CreateTableOptions::Options(options) + } else { + CreateTableOptions::None + }; + + Ok(CreateTableBuilder::new(table_name) + .snapshot(true) + .if_not_exists(if_not_exists) + .clone_clause(clone) + .version(version) + .table_options(table_options) + .build()) + } + + /// Parse a file format for external tables. pub fn parse_file_format(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -5921,6 +6428,7 @@ impl<'a> Parser<'a> { } } + /// Parse an `ANALYZE FORMAT`. pub fn parse_analyze_format(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -5928,19 +6436,21 @@ impl<'a> Parser<'a> { Keyword::TEXT => Ok(AnalyzeFormat::TEXT), Keyword::GRAPHVIZ => Ok(AnalyzeFormat::GRAPHVIZ), Keyword::JSON => Ok(AnalyzeFormat::JSON), + Keyword::TREE => Ok(AnalyzeFormat::TREE), _ => self.expected("fileformat", next_token), }, _ => self.expected("fileformat", next_token), } } + /// Parse a `CREATE VIEW` statement. pub fn parse_create_view( &mut self, or_alter: bool, or_replace: bool, temporary: bool, create_view_params: Option, - ) -> Result { + ) -> Result { let secure = self.parse_keyword(Keyword::SECURE); let materialized = self.parse_keyword(Keyword::MATERIALIZED); self.expect_keyword_is(Keyword::VIEW)?; @@ -5953,6 +6463,7 @@ impl<'a> Parser<'a> { let name_before_not_exists = !if_not_exists_first && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let if_not_exists = if_not_exists_first || name_before_not_exists; + let copy_grants = self.parse_keywords(&[Keyword::COPY, Keyword::GRANTS]); // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let columns = self.parse_view_columns()?; @@ -5985,7 +6496,7 @@ impl<'a> Parser<'a> { None }; - let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) + let comment = if self.dialect.supports_create_view_comment_syntax() && self.parse_keyword(Keyword::COMMENT) { self.expect_token(&Token::Eq)?; @@ -6020,11 +6531,11 @@ impl<'a> Parser<'a> { with_no_schema_binding, if_not_exists, temporary, + copy_grants, to, params: create_view_params, name_before_not_exists, - } - .into()) + }) } /// Parse optional parameters for the `CREATE VIEW` statement supported by [MySQL]. @@ -6085,7 +6596,8 @@ impl<'a> Parser<'a> { } } - pub fn parse_create_role(&mut self) -> Result { + /// Parse a `CREATE ROLE` statement. + pub fn parse_create_role(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; @@ -6256,7 +6768,7 @@ impl<'a> Parser<'a> { Ok(()) } } else { - self.expected("ROLE or GROUP after IN", self.peek_token()) + self.expected_ref("ROLE or GROUP after IN", self.peek_token_ref()) } } Keyword::ROLE => { @@ -6306,16 +6818,18 @@ impl<'a> Parser<'a> { user, admin, authorization_owner, - } - .into()) + }) } + /// Parse an `OWNER` clause. pub fn parse_owner(&mut self) -> Result { let owner = match self.parse_one_of_keywords(&[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { Some(Keyword::CURRENT_USER) => Owner::CurrentUser, Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, Some(Keyword::SESSION_USER) => Owner::SessionUser, - Some(_) => unreachable!(), + Some(unexpected_keyword) => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in owner"), + )), None => { match self.parse_identifier() { Ok(ident) => Owner::Ident(ident), @@ -6329,7 +6843,7 @@ impl<'a> Parser<'a> { } /// Parses a [Statement::CreateDomain] statement. - fn parse_create_domain(&mut self) -> Result { + fn parse_create_domain(&mut self) -> Result { let name = self.parse_object_name(false)?; self.expect_keyword_is(Keyword::AS)?; let data_type = self.parse_data_type()?; @@ -6348,13 +6862,13 @@ impl<'a> Parser<'a> { constraints.push(constraint); } - Ok(Statement::CreateDomain(CreateDomain { + Ok(CreateDomain { name, data_type, collation, default, constraints, - })) + }) } /// ```sql @@ -6366,7 +6880,7 @@ impl<'a> Parser<'a> { /// ``` /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createpolicy.html) - pub fn parse_create_policy(&mut self) -> Result { + pub fn parse_create_policy(&mut self) -> Result { let name = self.parse_identifier()?; self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; @@ -6377,7 +6891,9 @@ impl<'a> Parser<'a> { Some(match keyword { Keyword::PERMISSIVE => CreatePolicyType::Permissive, Keyword::RESTRICTIVE => CreatePolicyType::Restrictive, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in policy type"), + )), }) } else { None @@ -6397,7 +6913,9 @@ impl<'a> Parser<'a> { Keyword::INSERT => CreatePolicyCommand::Insert, Keyword::UPDATE => CreatePolicyCommand::Update, Keyword::DELETE => CreatePolicyCommand::Delete, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in policy command"), + )), }) } else { None @@ -6447,7 +6965,7 @@ impl<'a> Parser<'a> { /// ``` /// /// [Hive Documentation](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362034#LanguageManualDDL-CreateDataConnectorCreateConnector) - pub fn parse_create_connector(&mut self) -> Result { + pub fn parse_create_connector(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let name = self.parse_identifier()?; @@ -6471,14 +6989,14 @@ impl<'a> Parser<'a> { _ => None, }; - Ok(Statement::CreateConnector(CreateConnector { + Ok(CreateConnector { name, if_not_exists, connector_type, url, comment, with_dcproperties, - })) + }) } /// Parse an operator name, which can contain special characters like +, -, <, >, = @@ -6502,7 +7020,7 @@ impl<'a> Parser<'a> { /// Parse a [Statement::CreateOperator] /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createoperator.html) - pub fn parse_create_operator(&mut self) -> Result { + pub fn parse_create_operator(&mut self) -> Result { let name = self.parse_operator_name()?; self.expect_token(&Token::LParen)?; @@ -6510,12 +7028,7 @@ impl<'a> Parser<'a> { let mut is_procedure = false; let mut left_arg: Option = None; let mut right_arg: Option = None; - let mut commutator: Option = None; - let mut negator: Option = None; - let mut restrict: Option = None; - let mut join: Option = None; - let mut hashes = false; - let mut merges = false; + let mut options: Vec = Vec::new(); loop { let keyword = self.expect_one_of_keywords(&[ @@ -6532,11 +7045,11 @@ impl<'a> Parser<'a> { ])?; match keyword { - Keyword::HASHES if !hashes => { - hashes = true; + Keyword::HASHES if !options.iter().any(|o| matches!(o, OperatorOption::Hashes)) => { + options.push(OperatorOption::Hashes); } - Keyword::MERGES if !merges => { - merges = true; + Keyword::MERGES if !options.iter().any(|o| matches!(o, OperatorOption::Merges)) => { + options.push(OperatorOption::Merges); } Keyword::FUNCTION | Keyword::PROCEDURE if function.is_none() => { self.expect_token(&Token::Eq)?; @@ -6551,33 +7064,49 @@ impl<'a> Parser<'a> { self.expect_token(&Token::Eq)?; right_arg = Some(self.parse_data_type()?); } - Keyword::COMMUTATOR if commutator.is_none() => { + Keyword::COMMUTATOR + if !options + .iter() + .any(|o| matches!(o, OperatorOption::Commutator(_))) => + { self.expect_token(&Token::Eq)?; if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; - commutator = Some(self.parse_operator_name()?); + let op = self.parse_operator_name()?; self.expect_token(&Token::RParen)?; + options.push(OperatorOption::Commutator(op)); } else { - commutator = Some(self.parse_operator_name()?); + options.push(OperatorOption::Commutator(self.parse_operator_name()?)); } } - Keyword::NEGATOR if negator.is_none() => { + Keyword::NEGATOR + if !options + .iter() + .any(|o| matches!(o, OperatorOption::Negator(_))) => + { self.expect_token(&Token::Eq)?; if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; - negator = Some(self.parse_operator_name()?); + let op = self.parse_operator_name()?; self.expect_token(&Token::RParen)?; + options.push(OperatorOption::Negator(op)); } else { - negator = Some(self.parse_operator_name()?); + options.push(OperatorOption::Negator(self.parse_operator_name()?)); } } - Keyword::RESTRICT if restrict.is_none() => { + Keyword::RESTRICT + if !options + .iter() + .any(|o| matches!(o, OperatorOption::Restrict(_))) => + { self.expect_token(&Token::Eq)?; - restrict = Some(self.parse_object_name(false)?); + options.push(OperatorOption::Restrict(Some( + self.parse_object_name(false)?, + ))); } - Keyword::JOIN if join.is_none() => { + Keyword::JOIN if !options.iter().any(|o| matches!(o, OperatorOption::Join(_))) => { self.expect_token(&Token::Eq)?; - join = Some(self.parse_object_name(false)?); + options.push(OperatorOption::Join(Some(self.parse_object_name(false)?))); } _ => { return Err(ParserError::ParserError(format!( @@ -6600,39 +7129,31 @@ impl<'a> Parser<'a> { ParserError::ParserError("CREATE OPERATOR requires FUNCTION parameter".to_string()) })?; - Ok(Statement::CreateOperator(CreateOperator { + Ok(CreateOperator { name, function, is_procedure, left_arg, right_arg, - commutator, - negator, - restrict, - join, - hashes, - merges, - })) + options, + }) } /// Parse a [Statement::CreateOperatorFamily] /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createopfamily.html) - pub fn parse_create_operator_family(&mut self) -> Result { + pub fn parse_create_operator_family(&mut self) -> Result { let name = self.parse_object_name(false)?; self.expect_keyword(Keyword::USING)?; let using = self.parse_identifier()?; - Ok(Statement::CreateOperatorFamily(CreateOperatorFamily { - name, - using, - })) + Ok(CreateOperatorFamily { name, using }) } /// Parse a [Statement::CreateOperatorClass] /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createopclass.html) - pub fn parse_create_operator_class(&mut self) -> Result { + pub fn parse_create_operator_class(&mut self) -> Result { let name = self.parse_object_name(false)?; let default = self.parse_keyword(Keyword::DEFAULT); self.expect_keywords(&[Keyword::FOR, Keyword::TYPE])?; @@ -6651,7 +7172,7 @@ impl<'a> Parser<'a> { let mut items = vec![]; loop { if self.parse_keyword(Keyword::OPERATOR) { - let strategy_number = self.parse_literal_uint()? as u32; + let strategy_number = self.parse_literal_uint()?; let operator_name = self.parse_operator_name()?; // Optional operator argument types @@ -6673,7 +7194,8 @@ impl<'a> Parser<'a> { let sort_family = self.parse_object_name(false)?; Some(OperatorPurpose::ForOrderBy { sort_family }) } else { - return self.expected("SEARCH or ORDER BY after FOR", self.peek_token()); + return self + .expected_ref("SEARCH or ORDER BY after FOR", self.peek_token_ref()); } } else { None @@ -6686,26 +7208,27 @@ impl<'a> Parser<'a> { purpose, }); } else if self.parse_keyword(Keyword::FUNCTION) { - let support_number = self.parse_literal_uint()? as u32; + let support_number = self.parse_literal_uint()?; // Optional operator types - let op_types = - if self.consume_token(&Token::LParen) && self.peek_token() != Token::RParen { - let mut types = vec![]; - loop { - types.push(self.parse_data_type()?); - if !self.consume_token(&Token::Comma) { - break; - } + let op_types = if self.consume_token(&Token::LParen) + && self.peek_token_ref().token != Token::RParen + { + let mut types = vec![]; + loop { + types.push(self.parse_data_type()?); + if !self.consume_token(&Token::Comma) { + break; } - self.expect_token(&Token::RParen)?; - Some(types) - } else if self.consume_token(&Token::LParen) { - self.expect_token(&Token::RParen)?; - Some(vec![]) - } else { - None - }; + } + self.expect_token(&Token::RParen)?; + Some(types) + } else if self.consume_token(&Token::LParen) { + self.expect_token(&Token::RParen)?; + Some(vec![]) + } else { + None + }; let function_name = self.parse_object_name(false)?; @@ -6713,7 +7236,7 @@ impl<'a> Parser<'a> { let argument_types = if self.consume_token(&Token::LParen) { let mut types = vec![]; loop { - if self.peek_token() == Token::RParen { + if self.peek_token_ref().token == Token::RParen { break; } types.push(self.parse_data_type()?); @@ -6746,16 +7269,17 @@ impl<'a> Parser<'a> { } } - Ok(Statement::CreateOperatorClass(CreateOperatorClass { + Ok(CreateOperatorClass { name, default, for_type, using, family, items, - })) + }) } + /// Parse a `DROP` statement. pub fn parse_drop(&mut self) -> Result { // MySQL dialect supports `TEMPORARY` let temporary = dialect_of!(self is MySqlDialect | GenericDialect | DuckDbDialect) @@ -6788,19 +7312,19 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::STREAM) { ObjectType::Stream } else if self.parse_keyword(Keyword::FUNCTION) { - return self.parse_drop_function(); + return self.parse_drop_function().map(Into::into); } else if self.parse_keyword(Keyword::POLICY) { - return self.parse_drop_policy(); + return self.parse_drop_policy().map(Into::into); } else if self.parse_keyword(Keyword::CONNECTOR) { return self.parse_drop_connector(); } else if self.parse_keyword(Keyword::DOMAIN) { - return self.parse_drop_domain(); + return self.parse_drop_domain().map(Into::into); } else if self.parse_keyword(Keyword::PROCEDURE) { return self.parse_drop_procedure(); } else if self.parse_keyword(Keyword::SECRET) { return self.parse_drop_secret(temporary, persistent); } else if self.parse_keyword(Keyword::TRIGGER) { - return self.parse_drop_trigger(); + return self.parse_drop_trigger().map(Into::into); } else if self.parse_keyword(Keyword::EXTENSION) { return self.parse_drop_extension(); } else if self.parse_keyword(Keyword::OPERATOR) { @@ -6813,9 +7337,9 @@ impl<'a> Parser<'a> { self.parse_drop_operator() }; } else { - return self.expected( + return self.expected_ref( "CONNECTOR, DATABASE, EXTENSION, FUNCTION, INDEX, OPERATOR, POLICY, PROCEDURE, ROLE, SCHEMA, SECRET, SEQUENCE, STAGE, TABLE, TRIGGER, TYPE, VIEW, MATERIALIZED VIEW or USER after DROP", - self.peek_token(), + self.peek_token_ref(), ); }; // Many dialects support the non-standard `IF EXISTS` clause and allow @@ -6823,7 +7347,7 @@ impl<'a> Parser<'a> { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; - let loc = self.peek_token().span.start; + let loc = self.peek_token_ref().span.start; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); let purge = self.parse_keyword(Keyword::PURGE); @@ -6865,15 +7389,15 @@ impl<'a> Parser<'a> { /// DROP FUNCTION [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] /// [ CASCADE | RESTRICT ] /// ``` - fn parse_drop_function(&mut self) -> Result { + fn parse_drop_function(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let func_desc = self.parse_comma_separated(Parser::parse_function_desc)?; let drop_behavior = self.parse_optional_drop_behavior(); - Ok(Statement::DropFunction(DropFunction { + Ok(DropFunction { if_exists, func_desc, drop_behavior, - })) + }) } /// ```sql @@ -6881,13 +7405,13 @@ impl<'a> Parser<'a> { /// ``` /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-droppolicy.html) - fn parse_drop_policy(&mut self) -> Result { + fn parse_drop_policy(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let name = self.parse_identifier()?; self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; let drop_behavior = self.parse_optional_drop_behavior(); - Ok(Statement::DropPolicy { + Ok(DropPolicy { if_exists, name, table_name, @@ -6908,15 +7432,15 @@ impl<'a> Parser<'a> { /// ```sql /// DROP DOMAIN [ IF EXISTS ] name [ CASCADE | RESTRICT ] /// ``` - fn parse_drop_domain(&mut self) -> Result { + fn parse_drop_domain(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let name = self.parse_object_name(false)?; let drop_behavior = self.parse_optional_drop_behavior(); - Ok(Statement::DropDomain(DropDomain { + Ok(DropDomain { if_exists, name, drop_behavior, - })) + }) } /// ```sql @@ -6969,7 +7493,7 @@ impl<'a> Parser<'a> { (true, false) => Some(true), (false, true) => Some(false), (false, false) => None, - _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + _ => self.expected_ref("TEMPORARY or PERSISTENT", self.peek_token_ref())?, }; Ok(Statement::DropSecret { @@ -7028,7 +7552,9 @@ impl<'a> Parser<'a> { match keyword { Keyword::WITH => Some(true), Keyword::WITHOUT => Some(false), - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in cursor hold"), + )), } } None => None, @@ -7063,7 +7589,7 @@ impl<'a> Parser<'a> { pub fn parse_big_query_declare(&mut self) -> Result { let names = self.parse_comma_separated(Parser::parse_identifier)?; - let data_type = match self.peek_token().token { + let data_type = match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::DEFAULT => None, _ => Some(self.parse_data_type()?), }; @@ -7127,7 +7653,7 @@ impl<'a> Parser<'a> { let (declare_type, for_query, assigned_expr, data_type) = if self.parse_keyword(Keyword::CURSOR) { self.expect_keyword_is(Keyword::FOR)?; - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::SELECT => ( Some(DeclareType::Cursor), Some(self.parse_query()?), @@ -7142,7 +7668,7 @@ impl<'a> Parser<'a> { ), } } else if self.parse_keyword(Keyword::RESULTSET) { - let assigned_expr = if self.peek_token().token != Token::SemiColon { + let assigned_expr = if self.peek_token_ref().token != Token::SemiColon { self.parse_snowflake_variable_declaration_expression()? } else { // Nothing more to do. The statement has no further parameters. @@ -7151,7 +7677,7 @@ impl<'a> Parser<'a> { (Some(DeclareType::ResultSet), None, assigned_expr, None) } else if self.parse_keyword(Keyword::EXCEPTION) { - let assigned_expr = if self.peek_token().token == Token::LParen { + let assigned_expr = if self.peek_token_ref().token == Token::LParen { Some(DeclareAssignment::Expr(Box::new(self.parse_expr()?))) } else { // Nothing more to do. The statement has no further parameters. @@ -7165,7 +7691,7 @@ impl<'a> Parser<'a> { self.parse_snowflake_variable_declaration_expression()? { (Some(assigned_expr), None) - } else if let Token::Word(_) = self.peek_token().token { + } else if let Token::Word(_) = &self.peek_token_ref().token { let data_type = self.parse_data_type()?; ( self.parse_snowflake_variable_declaration_expression()?, @@ -7190,7 +7716,7 @@ impl<'a> Parser<'a> { stmts.push(stmt); if self.consume_token(&Token::SemiColon) { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) if ALL_KEYWORDS .binary_search(&w.value.to_uppercase().as_str()) @@ -7244,7 +7770,7 @@ impl<'a> Parser<'a> { let ident = self.parse_identifier()?; if !ident.value.starts_with('@') && !matches!( - self.peek_token().token, + &self.peek_token_ref().token, Token::Word(w) if w.keyword == Keyword::CURSOR ) { @@ -7256,7 +7782,7 @@ impl<'a> Parser<'a> { } }?; - let (declare_type, data_type) = match self.peek_token().token { + let (declare_type, data_type) = match &self.peek_token_ref().token { Token::Word(w) => match w.keyword { Keyword::CURSOR => { self.next_token(); @@ -7303,7 +7829,7 @@ impl<'a> Parser<'a> { pub fn parse_snowflake_variable_declaration_expression( &mut self, ) -> Result, ParserError> { - Ok(match self.peek_token().token { + Ok(match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::DEFAULT => { self.next_token(); // Skip `DEFAULT` Some(DeclareAssignment::Default(Box::new(self.parse_expr()?))) @@ -7327,7 +7853,7 @@ impl<'a> Parser<'a> { pub fn parse_mssql_variable_declaration_expression( &mut self, ) -> Result, ParserError> { - Ok(match self.peek_token().token { + Ok(match &self.peek_token_ref().token { Token::Eq => { self.next_token(); // Skip `=` Some(DeclareAssignment::MsSqlAssignment(Box::new( @@ -7338,7 +7864,7 @@ impl<'a> Parser<'a> { }) } - // FETCH [ direction { FROM | IN } ] cursor INTO target; + /// Parse `FETCH [direction] { FROM | IN } cursor INTO target;` statement. pub fn parse_fetch_statement(&mut self) -> Result { let direction = if self.parse_keyword(Keyword::NEXT) { FetchDirection::Next @@ -7350,11 +7876,11 @@ impl<'a> Parser<'a> { FetchDirection::Last } else if self.parse_keyword(Keyword::ABSOLUTE) { FetchDirection::Absolute { - limit: self.parse_number_value()?.value, + limit: self.parse_number_value()?, } } else if self.parse_keyword(Keyword::RELATIVE) { FetchDirection::Relative { - limit: self.parse_number_value()?.value, + limit: self.parse_number_value()?, } } else if self.parse_keyword(Keyword::FORWARD) { if self.parse_keyword(Keyword::ALL) { @@ -7362,7 +7888,7 @@ impl<'a> Parser<'a> { } else { FetchDirection::Forward { // TODO: Support optional - limit: Some(self.parse_number_value()?.value), + limit: Some(self.parse_number_value()?), } } } else if self.parse_keyword(Keyword::BACKWARD) { @@ -7371,14 +7897,14 @@ impl<'a> Parser<'a> { } else { FetchDirection::Backward { // TODO: Support optional - limit: Some(self.parse_number_value()?.value), + limit: Some(self.parse_number_value()?), } } } else if self.parse_keyword(Keyword::ALL) { FetchDirection::All } else { FetchDirection::Count { - limit: self.parse_number_value()?.value, + limit: self.parse_number_value()?, } }; @@ -7389,7 +7915,7 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::IN)?; FetchPosition::In } else { - return parser_err!("Expected FROM or IN", self.peek_token().span.start); + return parser_err!("Expected FROM or IN", self.peek_token_ref().span.start); }; let name = self.parse_identifier()?; @@ -7408,6 +7934,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a `DISCARD` statement. pub fn parse_discard(&mut self) -> Result { let object_type = if self.parse_keyword(Keyword::ALL) { DiscardObject::ALL @@ -7418,15 +7945,16 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::TEMP) || self.parse_keyword(Keyword::TEMPORARY) { DiscardObject::TEMP } else { - return self.expected( + return self.expected_ref( "ALL, PLANS, SEQUENCES, TEMP or TEMPORARY after DISCARD", - self.peek_token(), + self.peek_token_ref(), ); }; Ok(Statement::Discard { object_type }) } - pub fn parse_create_index(&mut self, unique: bool) -> Result { + /// Parse a `CREATE INDEX` statement. + pub fn parse_create_index(&mut self, unique: bool) -> Result { let concurrently = self.parse_keyword(Keyword::CONCURRENTLY); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -7500,7 +8028,7 @@ impl<'a> Parser<'a> { alter_options.push(self.parse_alter_table_operation()?) } - Ok(Statement::CreateIndex(CreateIndex { + Ok(CreateIndex { name: index_name, table_name, using, @@ -7514,10 +8042,11 @@ impl<'a> Parser<'a> { predicate, index_options, alter_options, - })) + }) } - pub fn parse_create_extension(&mut self) -> Result { + /// Parse a `CREATE EXTENSION` statement. + pub fn parse_create_extension(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let name = self.parse_identifier()?; @@ -7547,8 +8076,7 @@ impl<'a> Parser<'a> { schema, version, cascade, - } - .into()) + }) } /// Parse a PostgreSQL-specific [Statement::DropExtension] statement. @@ -7564,7 +8092,7 @@ impl<'a> Parser<'a> { .map(|k| match k { Keyword::CASCADE => Ok(ReferentialAction::Cascade), Keyword::RESTRICT => Ok(ReferentialAction::Restrict), - _ => self.expected("CASCADE or RESTRICT", self.peek_token()), + _ => self.expected_ref("CASCADE or RESTRICT", self.peek_token_ref()), }) .transpose()?, })) @@ -7644,11 +8172,14 @@ impl<'a> Parser<'a> { })) } - //TODO: Implement parsing for Skewed + /// Parse Hive distribution style. + /// + /// TODO: Support parsing for `SKEWED` distribution style. pub fn parse_hive_distribution(&mut self) -> Result { if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_column_def)?; + let columns = + self.parse_comma_separated(|parser| parser.parse_column_def_inner(true))?; self.expect_token(&Token::RParen)?; Ok(HiveDistributionStyle::PARTITIONED { columns }) } else { @@ -7656,6 +8187,24 @@ impl<'a> Parser<'a> { } } + /// Parse Redshift `DISTSTYLE { AUTO | EVEN | KEY | ALL }`. + /// + /// See + fn parse_dist_style(&mut self) -> Result { + let token = self.next_token(); + match &token.token { + Token::Word(w) => match w.keyword { + Keyword::AUTO => Ok(DistStyle::Auto), + Keyword::EVEN => Ok(DistStyle::Even), + Keyword::KEY => Ok(DistStyle::Key), + Keyword::ALL => Ok(DistStyle::All), + _ => self.expected("AUTO, EVEN, KEY, or ALL", token), + }, + _ => self.expected("AUTO, EVEN, KEY, or ALL", token), + } + } + + /// Parse Hive formats. pub fn parse_hive_formats(&mut self) -> Result, ParserError> { let mut hive_format: Option = None; loop { @@ -7711,6 +8260,7 @@ impl<'a> Parser<'a> { Ok(hive_format) } + /// Parse Hive row format. pub fn parse_row_format(&mut self) -> Result { self.expect_keyword_is(Keyword::FORMAT)?; match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { @@ -7815,17 +8365,34 @@ impl<'a> Parser<'a> { } } + /// Parse `CREATE TABLE` statement. pub fn parse_create_table( &mut self, or_replace: bool, temporary: bool, global: Option, transient: bool, - ) -> Result { + ) -> Result { let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name(allow_unquoted_hyphen)?; + // PostgreSQL PARTITION OF for child partition tables + // Note: This is a PostgreSQL-specific feature, but the dialect check was intentionally + // removed to allow GenericDialect and other dialects to parse this syntax. This enables + // multi-dialect SQL tools to work with PostgreSQL-specific DDL statements. + // + // PARTITION OF can be combined with other table definition clauses in the AST, + // though PostgreSQL itself prohibits PARTITION OF with AS SELECT or LIKE clauses. + // The parser accepts these combinations for flexibility; semantic validation + // is left to downstream tools. + // Child partitions can have their own constraints and indexes. + let partition_of = if self.parse_keywords(&[Keyword::PARTITION, Keyword::OF]) { + Some(self.parse_object_name(allow_unquoted_hyphen)?) + } else { + None + }; + // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs let on_cluster = self.parse_optional_on_cluster()?; @@ -7850,6 +8417,20 @@ impl<'a> Parser<'a> { None }; + // PostgreSQL PARTITION OF: partition bound specification + let for_values = if partition_of.is_some() { + if self.peek_keyword(Keyword::FOR) || self.peek_keyword(Keyword::DEFAULT) { + Some(self.parse_partition_for_values()?) + } else { + return self.expected_ref( + "FOR VALUES or DEFAULT after PARTITION OF", + self.peek_token_ref(), + ); + } + } else { + None + }; + // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); @@ -7871,7 +8452,7 @@ impl<'a> Parser<'a> { let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { if self.consume_token(&Token::LParen) { - let columns = if self.peek_token() != Token::RParen { + let columns = if self.peek_token_ref().token != Token::RParen { self.parse_comma_separated(|p| p.parse_expr())? } else { vec![] @@ -7893,6 +8474,37 @@ impl<'a> Parser<'a> { let strict = self.parse_keyword(Keyword::STRICT); + // Redshift: BACKUP YES|NO + let backup = if self.parse_keyword(Keyword::BACKUP) { + let keyword = self.expect_one_of_keywords(&[Keyword::YES, Keyword::NO])?; + Some(keyword == Keyword::YES) + } else { + None + }; + + // Redshift: DISTSTYLE, DISTKEY, SORTKEY + let diststyle = if self.parse_keyword(Keyword::DISTSTYLE) { + Some(self.parse_dist_style()?) + } else { + None + }; + let distkey = if self.parse_keyword(Keyword::DISTKEY) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(expr) + } else { + None + }; + let sortkey = if self.parse_keyword(Keyword::SORTKEY) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + Some(columns) + } else { + None + }; + // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { Some(self.parse_query()?) @@ -7927,9 +8539,15 @@ impl<'a> Parser<'a> { .partition_by(create_table_config.partition_by) .cluster_by(create_table_config.cluster_by) .inherits(create_table_config.inherits) + .partition_of(partition_of) + .for_values(for_values) .table_options(create_table_config.table_options) .primary_key(primary_key) .strict(strict) + .backup(backup) + .diststyle(diststyle) + .distkey(distkey) + .sortkey(sortkey) .build()) } @@ -7981,11 +8599,74 @@ impl<'a> Parser<'a> { } else { parser_err!( "Expecting DELETE ROWS, PRESERVE ROWS or DROP", - self.peek_token() + self.peek_token_ref() ) } } + /// Parse [ForValues] of a `PARTITION OF` clause. + /// + /// Parses: `FOR VALUES partition_bound_spec | DEFAULT` + /// + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createtable.html) + fn parse_partition_for_values(&mut self) -> Result { + if self.parse_keyword(Keyword::DEFAULT) { + return Ok(ForValues::Default); + } + + self.expect_keywords(&[Keyword::FOR, Keyword::VALUES])?; + + if self.parse_keyword(Keyword::IN) { + // FOR VALUES IN (expr, ...) + self.expect_token(&Token::LParen)?; + if self.peek_token_ref().token == Token::RParen { + return self.expected_ref("at least one value", self.peek_token_ref()); + } + let values = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(ForValues::In(values)) + } else if self.parse_keyword(Keyword::FROM) { + // FOR VALUES FROM (...) TO (...) + self.expect_token(&Token::LParen)?; + if self.peek_token_ref().token == Token::RParen { + return self.expected_ref("at least one value", self.peek_token_ref()); + } + let from = self.parse_comma_separated(Parser::parse_partition_bound_value)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::TO)?; + self.expect_token(&Token::LParen)?; + if self.peek_token_ref().token == Token::RParen { + return self.expected_ref("at least one value", self.peek_token_ref()); + } + let to = self.parse_comma_separated(Parser::parse_partition_bound_value)?; + self.expect_token(&Token::RParen)?; + Ok(ForValues::From { from, to }) + } else if self.parse_keyword(Keyword::WITH) { + // FOR VALUES WITH (MODULUS n, REMAINDER r) + self.expect_token(&Token::LParen)?; + self.expect_keyword(Keyword::MODULUS)?; + let modulus = self.parse_literal_uint()?; + self.expect_token(&Token::Comma)?; + self.expect_keyword(Keyword::REMAINDER)?; + let remainder = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Ok(ForValues::With { modulus, remainder }) + } else { + self.expected_ref("IN, FROM, or WITH after FOR VALUES", self.peek_token_ref()) + } + } + + /// Parse a single partition bound value (MINVALUE, MAXVALUE, or expression). + fn parse_partition_bound_value(&mut self) -> Result { + if self.parse_keyword(Keyword::MINVALUE) { + Ok(PartitionBoundValue::MinValue) + } else if self.parse_keyword(Keyword::MAXVALUE) { + Ok(PartitionBoundValue::MaxValue) + } else { + Ok(PartitionBoundValue::Expr(self.parse_expr()?)) + } + } + /// Parse configuration like inheritance, partitioning, clustering information during the table creation. /// /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) @@ -8028,7 +8709,7 @@ impl<'a> Parser<'a> { )); }; - if let Token::Word(word) = self.peek_token().token { + if let Token::Word(word) = &self.peek_token_ref().token { if word.keyword == Keyword::OPTIONS { table_options = CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?) @@ -8086,7 +8767,7 @@ impl<'a> Parser<'a> { let engine = match value.token { Token::Word(w) => { - let parameters = if self.peek_token() == Token::LParen { + let parameters = if self.peek_token_ref().token == Token::LParen { self.parse_parenthesized_identifiers()? } else { vec![] @@ -8244,6 +8925,7 @@ impl<'a> Parser<'a> { Ok(Some(SqlOption::KeyValue { key, value })) } + /// Parse plain options. pub fn parse_plain_options(&mut self) -> Result, ParserError> { let mut options = Vec::new(); @@ -8257,6 +8939,7 @@ impl<'a> Parser<'a> { Ok(options) } + /// Parse optional inline comment. pub fn parse_optional_inline_comment(&mut self) -> Result, ParserError> { let comment = if self.parse_keyword(Keyword::COMMENT) { let has_eq = self.consume_token(&Token::Eq); @@ -8272,6 +8955,7 @@ impl<'a> Parser<'a> { Ok(comment) } + /// Parse comment value. pub fn parse_comment_value(&mut self) -> Result { let next_token = self.next_token(); let value = match next_token.token { @@ -8282,6 +8966,7 @@ impl<'a> Parser<'a> { Ok(value) } + /// Parse optional procedure parameters. pub fn parse_optional_procedure_parameters( &mut self, ) -> Result>, ParserError> { @@ -8290,7 +8975,7 @@ impl<'a> Parser<'a> { return Ok(Some(params)); } loop { - if let Token::Word(_) = self.peek_token().token { + if let Token::Word(_) = &self.peek_token_ref().token { params.push(self.parse_procedure_param()?) } let comma = self.consume_token(&Token::Comma); @@ -8298,12 +8983,16 @@ impl<'a> Parser<'a> { // allow a trailing comma, even though it's not in standard break; } else if !comma { - return self.expected("',' or ')' after parameter definition", self.peek_token()); + return self.expected_ref( + "',' or ')' after parameter definition", + self.peek_token_ref(), + ); } } Ok(Some(params)) } + /// Parse columns and constraints. pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { let mut columns = vec![]; let mut constraints = vec![]; @@ -8314,17 +9003,21 @@ impl<'a> Parser<'a> { loop { if let Some(constraint) = self.parse_optional_table_constraint()? { constraints.push(constraint); - } else if let Token::Word(_) = self.peek_token().token { + } else if let Token::Word(_) = &self.peek_token_ref().token { columns.push(self.parse_column_def()?); } else { - return self.expected("column name or constraint definition", self.peek_token()); + return self.expected_ref( + "column name or constraint definition", + self.peek_token_ref(), + ); } let comma = self.consume_token(&Token::Comma); - let rparen = self.peek_token().token == Token::RParen; + let rparen = self.peek_token_ref().token == Token::RParen; if !comma && !rparen { - return self.expected("',' or ')' after column definition", self.peek_token()); + return self + .expected_ref("',' or ')' after column definition", self.peek_token_ref()); }; if rparen @@ -8340,6 +9033,7 @@ impl<'a> Parser<'a> { Ok((columns, constraints)) } + /// Parse procedure parameter. pub fn parse_procedure_param(&mut self) -> Result { let mode = if self.parse_keyword(Keyword::IN) { Some(ArgMode::In) @@ -8366,10 +9060,21 @@ impl<'a> Parser<'a> { }) } + /// Parse column definition. pub fn parse_column_def(&mut self) -> Result { + self.parse_column_def_inner(false) + } + + fn parse_column_def_inner( + &mut self, + optional_data_type: bool, + ) -> Result { let col_name = self.parse_identifier()?; let data_type = if self.is_column_type_sqlite_unspecified() { DataType::Unspecified + } else if optional_data_type { + self.maybe_parse(|parser| parser.parse_data_type())? + .unwrap_or(DataType::Unspecified) } else { self.parse_data_type()? }; @@ -8380,9 +9085,9 @@ impl<'a> Parser<'a> { if let Some(option) = self.parse_optional_column_option()? { options.push(ColumnOptionDef { name, option }); } else { - return self.expected( + return self.expected_ref( "constraint details after CONSTRAINT ", - self.peek_token(), + self.peek_token_ref(), ); } } else if let Some(option) = self.parse_optional_column_option()? { @@ -8400,7 +9105,7 @@ impl<'a> Parser<'a> { fn is_column_type_sqlite_unspecified(&mut self) -> bool { if dialect_of!(self is SQLiteDialect) { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(word) => matches!( word.keyword, Keyword::CONSTRAINT @@ -8421,6 +9126,7 @@ impl<'a> Parser<'a> { } } + /// Parse optional column option. pub fn parse_optional_column_option(&mut self) -> Result, ParserError> { if let Some(option) = self.dialect.parse_column_option(self)? { return option; @@ -8450,30 +9156,24 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::NULL) { Ok(Some(ColumnOption::Null)) } else if self.parse_keyword(Keyword::DEFAULT) { - Ok(Some(ColumnOption::Default( - self.parse_column_option_expr()?, - ))) + Ok(Some(ColumnOption::Default(self.parse_expr()?))) } else if dialect_of!(self is ClickHouseDialect| GenericDialect) && self.parse_keyword(Keyword::MATERIALIZED) { - Ok(Some(ColumnOption::Materialized( - self.parse_column_option_expr()?, - ))) + Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) } else if dialect_of!(self is ClickHouseDialect| GenericDialect) && self.parse_keyword(Keyword::ALIAS) { - Ok(Some(ColumnOption::Alias(self.parse_column_option_expr()?))) + Ok(Some(ColumnOption::Alias(self.parse_expr()?))) } else if dialect_of!(self is ClickHouseDialect| GenericDialect) && self.parse_keyword(Keyword::EPHEMERAL) { // The expression is optional for the EPHEMERAL syntax, so we need to check // if the column definition has remaining tokens before parsing the expression. - if matches!(self.peek_token().token, Token::Comma | Token::RParen) { + if matches!(self.peek_token_ref().token, Token::Comma | Token::RParen) { Ok(Some(ColumnOption::Ephemeral(None))) } else { - Ok(Some(ColumnOption::Ephemeral(Some( - self.parse_column_option_expr()?, - )))) + Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) } } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { let characteristics = self.parse_constraint_characteristics()?; @@ -8489,12 +9189,18 @@ impl<'a> Parser<'a> { .into(), )) } else if self.parse_keyword(Keyword::UNIQUE) { + let index_type_display = + if self.dialect.supports_key_column_option() && self.parse_keyword(Keyword::KEY) { + KeyOrIndexDisplay::Key + } else { + KeyOrIndexDisplay::None + }; let characteristics = self.parse_constraint_characteristics()?; Ok(Some( UniqueConstraint { name: None, index_name: None, - index_type_display: KeyOrIndexDisplay::None, + index_type_display, index_type: None, columns: vec![], index_options: vec![], @@ -8503,6 +9209,21 @@ impl<'a> Parser<'a> { } .into(), )) + } else if self.dialect.supports_key_column_option() && self.parse_keyword(Keyword::KEY) { + // In MySQL, `KEY` in a column definition is shorthand for `PRIMARY KEY`. + // See: https://dev.mysql.com/doc/refman/8.4/en/create-table.html + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some( + PrimaryKeyConstraint { + name: None, + index_name: None, + index_type: None, + columns: vec![], + index_options: vec![], + characteristics, + } + .into(), + )) } else if self.parse_keyword(Keyword::REFERENCES) { let foreign_table = self.parse_object_name(false)?; // PostgreSQL allows omitting the column list and @@ -8547,11 +9268,20 @@ impl<'a> Parser<'a> { // since `CHECK` requires parentheses, we can parse the inner expression in ParserState::Normal let expr: Expr = self.with_state(ParserState::Normal, |p| p.parse_expr())?; self.expect_token(&Token::RParen)?; + + let enforced = if self.parse_keyword(Keyword::ENFORCED) { + Some(true) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::ENFORCED]) { + Some(false) + } else { + None + }; + Ok(Some( CheckConstraint { name: None, // Column-level check constraints don't have names expr: Box::new(expr), - enforced: None, // Could be extended later to support MySQL ENFORCED/NOT ENFORCED + enforced, } .into(), )) @@ -8586,7 +9316,7 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) && dialect_of!(self is MySqlDialect | GenericDialect) { - let expr = self.parse_column_option_expr()?; + let expr = self.parse_expr()?; Ok(Some(ColumnOption::OnUpdate(expr))) } else if self.parse_keyword(Keyword::GENERATED) { self.parse_optional_column_option_generated() @@ -8604,9 +9334,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::SRID) && dialect_of!(self is MySqlDialect | GenericDialect) { - Ok(Some(ColumnOption::Srid(Box::new( - self.parse_column_option_expr()?, - )))) + Ok(Some(ColumnOption::Srid(Box::new(self.parse_expr()?)))) } else if self.parse_keyword(Keyword::IDENTITY) && dialect_of!(self is MsSqlDialect | GenericDialect) { @@ -8648,31 +9376,6 @@ impl<'a> Parser<'a> { } } - /// When parsing some column option expressions we need to revert to [ParserState::Normal] since - /// `NOT NULL` is allowed as an alias for `IS NOT NULL`. - /// In those cases we use this helper instead of calling [Parser::parse_expr] directly. - /// - /// For example, consider these `CREATE TABLE` statements: - /// ```sql - /// CREATE TABLE foo (abc BOOL DEFAULT (42 NOT NULL) NOT NULL); - /// ``` - /// vs - /// ```sql - /// CREATE TABLE foo (abc BOOL NOT NULL); - /// ``` - /// - /// In the first we should parse the inner portion of `(42 NOT NULL)` as [Expr::IsNotNull], - /// whereas is both statements that trailing `NOT NULL` should only be parsed as a - /// [ColumnOption::NotNull]. - fn parse_column_option_expr(&mut self) -> Result { - if self.peek_token_ref().token == Token::LParen { - let expr: Expr = self.with_state(ParserState::Normal, |p| p.parse_prefix())?; - Ok(expr) - } else { - Ok(self.parse_expr()?) - } - } - pub(crate) fn parse_tag(&mut self) -> Result { let name = self.parse_object_name(false)?; self.expect_token(&Token::Eq)?; @@ -8726,7 +9429,7 @@ impl<'a> Parser<'a> { )) } else if dialect_of!(self is PostgreSqlDialect) { // Postgres' AS IDENTITY branches are above, this one needs STORED - self.expected("STORED", self.peek_token()) + self.expected_ref("STORED", self.peek_token_ref()) } else if self.parse_keywords(&[Keyword::VIRTUAL]) { Ok((GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual))) } else { @@ -8774,6 +9477,7 @@ impl<'a> Parser<'a> { })) } + /// Parse optional `CLUSTERED BY` clause for Hive/Generic dialects. pub fn parse_optional_clustered_by(&mut self) -> Result, ParserError> { let clustered_by = if dialect_of!(self is HiveDialect|GenericDialect) && self.parse_keywords(&[Keyword::CLUSTERED, Keyword::BY]) @@ -8803,6 +9507,9 @@ impl<'a> Parser<'a> { Ok(clustered_by) } + /// Parse a referential action used in foreign key clauses. + /// + /// Recognized forms: `RESTRICT`, `CASCADE`, `SET NULL`, `NO ACTION`, `SET DEFAULT`. pub fn parse_referential_action(&mut self) -> Result { if self.parse_keyword(Keyword::RESTRICT) { Ok(ReferentialAction::Restrict) @@ -8815,13 +9522,14 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { Ok(ReferentialAction::SetDefault) } else { - self.expected( + self.expected_ref( "one of RESTRICT, CASCADE, SET NULL, NO ACTION or SET DEFAULT", - self.peek_token(), + self.peek_token_ref(), ) } } + /// Parse a `MATCH` kind for constraint references: `FULL`, `PARTIAL`, or `SIMPLE`. pub fn parse_match_kind(&mut self) -> Result { if self.parse_keyword(Keyword::FULL) { Ok(ConstraintReferenceMatchKind::Full) @@ -8830,10 +9538,26 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::SIMPLE) { Ok(ConstraintReferenceMatchKind::Simple) } else { - self.expected("one of FULL, PARTIAL or SIMPLE", self.peek_token()) + self.expected_ref("one of FULL, PARTIAL or SIMPLE", self.peek_token_ref()) } } + /// Parse `index_name [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]` + /// after `{ PRIMARY KEY | UNIQUE } USING INDEX`. + fn parse_constraint_using_index( + &mut self, + name: Option, + ) -> Result { + let index_name = self.parse_identifier()?; + let characteristics = self.parse_constraint_characteristics()?; + Ok(ConstraintUsingIndex { + name, + index_name, + characteristics, + }) + } + + /// Parse optional constraint characteristics such as `DEFERRABLE`, `INITIALLY` and `ENFORCED`. pub fn parse_constraint_characteristics( &mut self, ) -> Result, ParserError> { @@ -8851,7 +9575,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::IMMEDIATE) { cc.initially = Some(DeferrableInitial::Immediate); } else { - self.expected("one of DEFERRED or IMMEDIATE", self.peek_token())?; + self.expected_ref("one of DEFERRED or IMMEDIATE", self.peek_token_ref())?; } } else if cc.enforced.is_none() && self.parse_keyword(Keyword::ENFORCED) { cc.enforced = Some(true); @@ -8871,11 +9595,25 @@ impl<'a> Parser<'a> { } } + /// Parse an optional table constraint (e.g. `PRIMARY KEY`, `UNIQUE`, `FOREIGN KEY`, `CHECK`). pub fn parse_optional_table_constraint( &mut self, ) -> Result, ParserError> { let name = if self.parse_keyword(Keyword::CONSTRAINT) { - Some(self.parse_identifier()?) + if self.dialect.supports_constraint_keyword_without_name() + && self + .peek_one_of_keywords(&[ + Keyword::CHECK, + Keyword::PRIMARY, + Keyword::UNIQUE, + Keyword::FOREIGN, + ]) + .is_some() + { + None + } else { + Some(self.parse_identifier()?) + } } else { None }; @@ -8883,12 +9621,22 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { Token::Word(w) if w.keyword == Keyword::UNIQUE => { + // PostgreSQL: UNIQUE USING INDEX index_name + // https://www.postgresql.org/docs/current/sql-altertable.html + if self.parse_keywords(&[Keyword::USING, Keyword::INDEX]) { + return Ok(Some(TableConstraint::UniqueUsingIndex( + self.parse_constraint_using_index(name)?, + ))); + } + let index_type_display = self.parse_index_type_display(); if !dialect_of!(self is GenericDialect | MySqlDialect) && !index_type_display.is_none() { - return self - .expected("`index_name` or `(column_name [, ...])`", self.peek_token()); + return self.expected_ref( + "`index_name` or `(column_name [, ...])`", + self.peek_token_ref(), + ); } let nulls_distinct = self.parse_optional_nulls_distinct()?; @@ -8918,6 +9666,14 @@ impl<'a> Parser<'a> { // after `PRIMARY` always stay `KEY` self.expect_keyword_is(Keyword::KEY)?; + // PostgreSQL: PRIMARY KEY USING INDEX index_name + // https://www.postgresql.org/docs/current/sql-altertable.html + if self.parse_keywords(&[Keyword::USING, Keyword::INDEX]) { + return Ok(Some(TableConstraint::PrimaryKeyUsingIndex( + self.parse_constraint_using_index(name)?, + ))); + } + // optional index name let index_name = self.parse_optional_ident()?; let index_type = self.parse_optional_using_then_index_type()?; @@ -9009,7 +9765,7 @@ impl<'a> Parser<'a> { { let display_as_key = w.keyword == Keyword::KEY; - let name = match self.peek_token().token { + let name = match &self.peek_token_ref().token { Token::Word(word) if word.keyword == Keyword::USING => None, _ => self.parse_optional_ident()?, }; @@ -9086,11 +9842,12 @@ impl<'a> Parser<'a> { }) } + /// Optionally parse a parenthesized list of `SqlOption`s introduced by `keyword`. pub fn maybe_parse_options( &mut self, keyword: Keyword, ) -> Result>, ParserError> { - if let Token::Word(word) = self.peek_token().token { + if let Token::Word(word) = &self.peek_token_ref().token { if word.keyword == keyword { return Ok(Some(self.parse_options(keyword)?)); } @@ -9098,6 +9855,7 @@ impl<'a> Parser<'a> { Ok(None) } + /// Parse a parenthesized list of `SqlOption`s following `keyword`, or return an empty vec. pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { if self.parse_keyword(keyword) { self.expect_token(&Token::LParen)?; @@ -9109,6 +9867,7 @@ impl<'a> Parser<'a> { } } + /// Parse options introduced by one of `keywords` followed by a parenthesized list. pub fn parse_options_with_keywords( &mut self, keywords: &[Keyword], @@ -9123,6 +9882,7 @@ impl<'a> Parser<'a> { } } + /// Parse an index type token (e.g. `BTREE`, `HASH`, or a custom identifier). pub fn parse_index_type(&mut self) -> Result { Ok(if self.parse_keyword(Keyword::BTREE) { IndexType::BTree @@ -9148,6 +9908,7 @@ impl<'a> Parser<'a> { /// ```sql //// USING BTREE (name, age DESC) /// ``` + /// Optionally parse `USING ` and return the parsed `IndexType` if present. pub fn parse_optional_using_then_index_type( &mut self, ) -> Result, ParserError> { @@ -9160,11 +9921,13 @@ impl<'a> Parser<'a> { /// Parse `[ident]`, mostly `ident` is name, like: /// `window_name`, `index_name`, ... + /// Parse an optional identifier, returning `Some(Ident)` if present. pub fn parse_optional_ident(&mut self) -> Result, ParserError> { self.maybe_parse(|parser| parser.parse_identifier()) } #[must_use] + /// Parse optional `KEY` or `INDEX` display tokens used in index/constraint declarations. pub fn parse_index_type_display(&mut self) -> KeyOrIndexDisplay { if self.parse_keyword(Keyword::KEY) { KeyOrIndexDisplay::Key @@ -9175,6 +9938,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional index option such as `USING ` or `COMMENT `. pub fn parse_optional_index_option(&mut self) -> Result, ParserError> { if let Some(index_type) = self.parse_optional_using_then_index_type()? { Ok(Some(IndexOption::Using(index_type))) @@ -9186,6 +9950,7 @@ impl<'a> Parser<'a> { } } + /// Parse zero or more index options and return them as a vector. pub fn parse_index_options(&mut self) -> Result, ParserError> { let mut options = Vec::new(); @@ -9197,10 +9962,11 @@ impl<'a> Parser<'a> { } } + /// Parse a single `SqlOption` used by various dialect-specific DDL statements. pub fn parse_sql_option(&mut self) -> Result { let is_mssql = dialect_of!(self is MsSqlDialect|GenericDialect); - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::HEAP && is_mssql => { Ok(SqlOption::Ident(self.parse_identifier()?)) } @@ -9220,6 +9986,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CLUSTERED` table option (MSSQL-specific syntaxes supported). pub fn parse_option_clustered(&mut self) -> Result { if self.parse_keywords(&[ Keyword::CLUSTERED, @@ -9256,6 +10023,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `PARTITION(...) FOR VALUES(...)` table option. pub fn parse_option_partition(&mut self) -> Result { self.expect_keyword_is(Keyword::PARTITION)?; self.expect_token(&Token::LParen)?; @@ -9285,6 +10053,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a parenthesized list of partition expressions and return a `Partition` value. pub fn parse_partition(&mut self) -> Result { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; @@ -9292,6 +10061,7 @@ impl<'a> Parser<'a> { Ok(Partition::Partitions(partitions)) } + /// Parse a parenthesized `SELECT` projection used for projection-based operations. pub fn parse_projection_select(&mut self) -> Result { self.expect_token(&Token::LParen)?; self.expect_keyword_is(Keyword::SELECT)?; @@ -9305,6 +10075,7 @@ impl<'a> Parser<'a> { order_by, }) } + /// Parse `ALTER TABLE ... ADD PROJECTION ...` operation. pub fn parse_alter_table_add_projection(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let name = self.parse_identifier()?; @@ -9316,6 +10087,19 @@ impl<'a> Parser<'a> { }) } + /// Parse Redshift `ALTER SORTKEY (column_list)`. + /// + /// See + fn parse_alter_sort_key(&mut self) -> Result { + self.expect_keyword_is(Keyword::ALTER)?; + self.expect_keyword_is(Keyword::SORTKEY)?; + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + Ok(AlterTableOperation::AlterSortKey { columns }) + } + + /// Parse a single `ALTER TABLE` operation and return an `AlterTableOperation`. pub fn parse_alter_table_operation(&mut self) -> Result { let operation = if self.parse_keyword(Keyword::ADD) { if let Some(constraint) = self.parse_optional_table_constraint()? { @@ -9403,9 +10187,9 @@ impl<'a> Parser<'a> { let name = self.parse_identifier()?; AlterTableOperation::DisableTrigger { name } } else { - return self.expected( + return self.expected_ref( "ROW LEVEL SECURITY, RULE, or TRIGGER after DISABLE", - self.peek_token(), + self.peek_token_ref(), ); } } else if self.parse_keyword(Keyword::ENABLE) { @@ -9430,11 +10214,26 @@ impl<'a> Parser<'a> { let name = self.parse_identifier()?; AlterTableOperation::EnableTrigger { name } } else { - return self.expected( + return self.expected_ref( "ALWAYS, REPLICA, ROW LEVEL SECURITY, RULE, or TRIGGER after ENABLE", - self.peek_token(), + self.peek_token_ref(), ); } + } else if self.parse_keywords(&[ + Keyword::FORCE, + Keyword::ROW, + Keyword::LEVEL, + Keyword::SECURITY, + ]) { + AlterTableOperation::ForceRowLevelSecurity + } else if self.parse_keywords(&[ + Keyword::NO, + Keyword::FORCE, + Keyword::ROW, + Keyword::LEVEL, + Keyword::SECURITY, + ]) { + AlterTableOperation::NoForceRowLevelSecurity } else if self.parse_keywords(&[Keyword::CLEAR, Keyword::PROJECTION]) && dialect_of!(self is ClickHouseDialect|GenericDialect) { @@ -9578,6 +10377,11 @@ impl<'a> Parser<'a> { column_position, } } else if self.parse_keyword(Keyword::ALTER) { + if self.peek_keyword(Keyword::SORTKEY) { + self.prev_token(); + return self.parse_alter_sort_key(); + } + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] let column_name = self.parse_identifier()?; let is_postgresql = dialect_of!(self is PostgreSqlDialect); @@ -9613,7 +10417,7 @@ impl<'a> Parser<'a> { let mut sequence_options: Option> = None; - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { self.expect_token(&Token::LParen)?; sequence_options = Some(self.parse_create_sequence_options()?); self.expect_token(&Token::RParen)?; @@ -9630,7 +10434,7 @@ impl<'a> Parser<'a> { "SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN" }; - return self.expected(message, self.peek_token()); + return self.expected_ref(message, self.peek_token_ref()); }; AlterTableOperation::AlterColumn { column_name, op } } else if self.parse_keyword(Keyword::SWAP) { @@ -9703,9 +10507,9 @@ impl<'a> Parser<'a> { Some(Keyword::EXCLUSIVE) => AlterTableLock::Exclusive, Some(Keyword::NONE) => AlterTableLock::None, Some(Keyword::SHARED) => AlterTableLock::Shared, - _ => self.expected( + _ => self.expected_ref( "DEFAULT, EXCLUSIVE, NONE or SHARED after LOCK [=]", - self.peek_token(), + self.peek_token_ref(), )?, }; AlterTableOperation::Lock { equals, lock } @@ -9721,9 +10525,9 @@ impl<'a> Parser<'a> { Some(Keyword::INSTANT) => AlterTableAlgorithm::Instant, Some(Keyword::INPLACE) => AlterTableAlgorithm::Inplace, Some(Keyword::COPY) => AlterTableAlgorithm::Copy, - _ => self.expected( + _ => self.expected_ref( "DEFAULT, INSTANT, INPLACE, or COPY after ALGORITHM [=]", - self.peek_token(), + self.peek_token_ref(), )?, }; AlterTableOperation::Algorithm { equals, algorithm } @@ -9732,8 +10536,8 @@ impl<'a> Parser<'a> { let value = self.parse_number_value()?; AlterTableOperation::AutoIncrement { equals, value } } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::IDENTITY]) { - let identity = if self.parse_keyword(Keyword::NONE) { - ReplicaIdentity::None + let identity = if self.parse_keyword(Keyword::NOTHING) { + ReplicaIdentity::Nothing } else if self.parse_keyword(Keyword::FULL) { ReplicaIdentity::Full } else if self.parse_keyword(Keyword::DEFAULT) { @@ -9741,9 +10545,9 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::USING, Keyword::INDEX]) { ReplicaIdentity::Index(self.parse_identifier()?) } else { - return self.expected( - "NONE, FULL, DEFAULT, or USING INDEX index_name after REPLICA IDENTITY", - self.peek_token(), + return self.expected_ref( + "NOTHING, FULL, DEFAULT, or USING INDEX index_name after REPLICA IDENTITY", + self.peek_token_ref(), ); }; @@ -9763,9 +10567,9 @@ impl<'a> Parser<'a> { if !options.is_empty() { AlterTableOperation::SetOptionsParens { options } } else { - return self.expected( + return self.expected_ref( "ADD, RENAME, PARTITION, SWAP, DROP, REPLICA IDENTITY, SET, or SET TBLPROPERTIES after ALTER TABLE", - self.peek_token(), + self.peek_token_ref(), ); } } @@ -9795,10 +10599,13 @@ impl<'a> Parser<'a> { Keyword::PART => Ok(Partition::Part(self.parse_expr()?)), Keyword::PARTITION => Ok(Partition::Expr(self.parse_expr()?)), // unreachable because expect_one_of_keywords used above - _ => unreachable!(), + unexpected_keyword => Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{PART, PARTITION}}, got {unexpected_keyword:?}"), + )), } } + /// Parse an `ALTER ` statement and dispatch to the appropriate alter handler. pub fn parse_alter(&mut self) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, @@ -9811,6 +10618,7 @@ impl<'a> Parser<'a> { Keyword::ICEBERG, Keyword::SCHEMA, Keyword::USER, + Keyword::OPERATOR, ])?; match object_type { Keyword::SCHEMA => { @@ -9832,10 +10640,10 @@ impl<'a> Parser<'a> { let index_name = self.parse_object_name(false)?; AlterIndexOperation::RenameIndex { index_name } } else { - return self.expected("TO after RENAME", self.peek_token()); + return self.expected_ref("TO after RENAME", self.peek_token_ref()); } } else { - return self.expected("RENAME after ALTER INDEX", self.peek_token()); + return self.expected_ref("RENAME after ALTER INDEX", self.peek_token_ref()); }; Ok(Statement::AlterIndex { @@ -9843,12 +10651,23 @@ impl<'a> Parser<'a> { operation, }) } + Keyword::OPERATOR => { + if self.parse_keyword(Keyword::FAMILY) { + self.parse_alter_operator_family().map(Into::into) + } else if self.parse_keyword(Keyword::CLASS) { + self.parse_alter_operator_class().map(Into::into) + } else { + self.parse_alter_operator().map(Into::into) + } + } Keyword::ROLE => self.parse_alter_role(), - Keyword::POLICY => self.parse_alter_policy(), + Keyword::POLICY => self.parse_alter_policy().map(Into::into), Keyword::CONNECTOR => self.parse_alter_connector(), - Keyword::USER => self.parse_alter_user(), + Keyword::USER => self.parse_alter_user().map(Into::into), // unreachable because expect_one_of_keywords used above - _ => unreachable!(), + unexpected_keyword => Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{VIEW, TYPE, TABLE, INDEX, ROLE, POLICY, CONNECTOR, ICEBERG, SCHEMA, USER, OPERATOR}}, got {unexpected_keyword:?}"), + )), } } @@ -9896,6 +10715,7 @@ impl<'a> Parser<'a> { .into()) } + /// Parse an `ALTER VIEW` statement. pub fn parse_alter_view(&mut self) -> Result { let name = self.parse_object_name(false)?; let columns = self.parse_parenthesized_column_list(Optional, false)?; @@ -9962,15 +10782,322 @@ impl<'a> Parser<'a> { } } - // Parse a [Statement::AlterSchema] - // ALTER SCHEMA [ IF EXISTS ] schema_name - pub fn parse_alter_schema(&mut self) -> Result { - self.expect_keywords(&[Keyword::ALTER, Keyword::SCHEMA])?; - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_object_name(false)?; - let operation = if self.parse_keywords(&[Keyword::SET, Keyword::OPTIONS]) { - self.prev_token(); - let options = self.parse_options(Keyword::OPTIONS)?; + /// Parse a [Statement::AlterOperator] + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-alteroperator.html) + pub fn parse_alter_operator(&mut self) -> Result { + let name = self.parse_operator_name()?; + + // Parse (left_type, right_type) + self.expect_token(&Token::LParen)?; + + let left_type = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_data_type()?) + }; + + self.expect_token(&Token::Comma)?; + let right_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + + // Parse the operation + let operation = if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { + let owner = if self.parse_keyword(Keyword::CURRENT_ROLE) { + Owner::CurrentRole + } else if self.parse_keyword(Keyword::CURRENT_USER) { + Owner::CurrentUser + } else if self.parse_keyword(Keyword::SESSION_USER) { + Owner::SessionUser + } else { + Owner::Ident(self.parse_identifier()?) + }; + AlterOperatorOperation::OwnerTo(owner) + } else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) { + let schema_name = self.parse_object_name(false)?; + AlterOperatorOperation::SetSchema { schema_name } + } else if self.parse_keyword(Keyword::SET) { + self.expect_token(&Token::LParen)?; + + let mut options = Vec::new(); + loop { + let keyword = self.expect_one_of_keywords(&[ + Keyword::RESTRICT, + Keyword::JOIN, + Keyword::COMMUTATOR, + Keyword::NEGATOR, + Keyword::HASHES, + Keyword::MERGES, + ])?; + + match keyword { + Keyword::RESTRICT => { + self.expect_token(&Token::Eq)?; + let proc_name = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_object_name(false)?) + }; + options.push(OperatorOption::Restrict(proc_name)); + } + Keyword::JOIN => { + self.expect_token(&Token::Eq)?; + let proc_name = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_object_name(false)?) + }; + options.push(OperatorOption::Join(proc_name)); + } + Keyword::COMMUTATOR => { + self.expect_token(&Token::Eq)?; + let op_name = self.parse_operator_name()?; + options.push(OperatorOption::Commutator(op_name)); + } + Keyword::NEGATOR => { + self.expect_token(&Token::Eq)?; + let op_name = self.parse_operator_name()?; + options.push(OperatorOption::Negator(op_name)); + } + Keyword::HASHES => { + options.push(OperatorOption::Hashes); + } + Keyword::MERGES => { + options.push(OperatorOption::Merges); + } + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in operator option"), + )), + } + + if !self.consume_token(&Token::Comma) { + break; + } + } + + self.expect_token(&Token::RParen)?; + AlterOperatorOperation::Set { options } + } else { + return self.expected_ref( + "OWNER TO, SET SCHEMA, or SET after ALTER OPERATOR", + self.peek_token_ref(), + ); + }; + + Ok(AlterOperator { + name, + left_type, + right_type, + operation, + }) + } + + /// Parse an operator item for ALTER OPERATOR FAMILY ADD operations + fn parse_operator_family_add_operator(&mut self) -> Result { + let strategy_number = self.parse_literal_uint()?; + let operator_name = self.parse_operator_name()?; + + // Operator argument types (required for ALTER OPERATOR FAMILY) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + // Optional purpose + let purpose = if self.parse_keyword(Keyword::FOR) { + if self.parse_keyword(Keyword::SEARCH) { + Some(OperatorPurpose::ForSearch) + } else if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + let sort_family = self.parse_object_name(false)?; + Some(OperatorPurpose::ForOrderBy { sort_family }) + } else { + return self.expected_ref("SEARCH or ORDER BY after FOR", self.peek_token_ref()); + } + } else { + None + }; + + Ok(OperatorFamilyItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + }) + } + + /// Parse a function item for ALTER OPERATOR FAMILY ADD operations + fn parse_operator_family_add_function(&mut self) -> Result { + let support_number = self.parse_literal_uint()?; + + // Optional operator types + let op_types = + if self.consume_token(&Token::LParen) && self.peek_token_ref().token != Token::RParen { + let types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + Some(types) + } else if self.consume_token(&Token::LParen) { + self.expect_token(&Token::RParen)?; + Some(vec![]) + } else { + None + }; + + let function_name = self.parse_object_name(false)?; + + // Function argument types + let argument_types = if self.consume_token(&Token::LParen) { + if self.peek_token_ref().token == Token::RParen { + self.expect_token(&Token::RParen)?; + vec![] + } else { + let types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + types + } + } else { + vec![] + }; + + Ok(OperatorFamilyItem::Function { + support_number, + op_types, + function_name, + argument_types, + }) + } + + /// Parse an operator item for ALTER OPERATOR FAMILY DROP operations + fn parse_operator_family_drop_operator( + &mut self, + ) -> Result { + let strategy_number = self.parse_literal_uint()?; + + // Operator argument types (required for DROP) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + Ok(OperatorFamilyDropItem::Operator { + strategy_number, + op_types, + }) + } + + /// Parse a function item for ALTER OPERATOR FAMILY DROP operations + fn parse_operator_family_drop_function( + &mut self, + ) -> Result { + let support_number = self.parse_literal_uint()?; + + // Operator types (required for DROP) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + Ok(OperatorFamilyDropItem::Function { + support_number, + op_types, + }) + } + + /// Parse an operator family item for ADD operations (dispatches to operator or function parsing) + fn parse_operator_family_add_item(&mut self) -> Result { + if self.parse_keyword(Keyword::OPERATOR) { + self.parse_operator_family_add_operator() + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_operator_family_add_function() + } else { + self.expected_ref("OPERATOR or FUNCTION", self.peek_token_ref()) + } + } + + /// Parse an operator family item for DROP operations (dispatches to operator or function parsing) + fn parse_operator_family_drop_item(&mut self) -> Result { + if self.parse_keyword(Keyword::OPERATOR) { + self.parse_operator_family_drop_operator() + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_operator_family_drop_function() + } else { + self.expected_ref("OPERATOR or FUNCTION", self.peek_token_ref()) + } + } + + /// Parse a [Statement::AlterOperatorFamily] + /// See + pub fn parse_alter_operator_family(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + let using = self.parse_identifier()?; + + let operation = if self.parse_keyword(Keyword::ADD) { + let items = self.parse_comma_separated(Parser::parse_operator_family_add_item)?; + AlterOperatorFamilyOperation::Add { items } + } else if self.parse_keyword(Keyword::DROP) { + let items = self.parse_comma_separated(Parser::parse_operator_family_drop_item)?; + AlterOperatorFamilyOperation::Drop { items } + } else if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) { + let new_name = self.parse_object_name(false)?; + AlterOperatorFamilyOperation::RenameTo { new_name } + } else if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { + let owner = self.parse_owner()?; + AlterOperatorFamilyOperation::OwnerTo(owner) + } else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) { + let schema_name = self.parse_object_name(false)?; + AlterOperatorFamilyOperation::SetSchema { schema_name } + } else { + return self.expected_ref( + "ADD, DROP, RENAME TO, OWNER TO, or SET SCHEMA after ALTER OPERATOR FAMILY", + self.peek_token_ref(), + ); + }; + + Ok(AlterOperatorFamily { + name, + using, + operation, + }) + } + + /// Parse an `ALTER OPERATOR CLASS` statement. + /// + /// Handles operations like `RENAME TO`, `OWNER TO`, and `SET SCHEMA`. + pub fn parse_alter_operator_class(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + let using = self.parse_identifier()?; + + let operation = if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) { + let new_name = self.parse_object_name(false)?; + AlterOperatorClassOperation::RenameTo { new_name } + } else if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { + let owner = self.parse_owner()?; + AlterOperatorClassOperation::OwnerTo(owner) + } else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) { + let schema_name = self.parse_object_name(false)?; + AlterOperatorClassOperation::SetSchema { schema_name } + } else { + return self.expected_ref( + "RENAME TO, OWNER TO, or SET SCHEMA after ALTER OPERATOR CLASS", + self.peek_token_ref(), + ); + }; + + Ok(AlterOperatorClass { + name, + using, + operation, + }) + } + + /// Parse an `ALTER SCHEMA` statement. + /// + /// Supports operations such as setting options, renaming, adding/dropping replicas, and changing owner. + pub fn parse_alter_schema(&mut self) -> Result { + self.expect_keywords(&[Keyword::ALTER, Keyword::SCHEMA])?; + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + let operation = if self.parse_keywords(&[Keyword::SET, Keyword::OPTIONS]) { + self.prev_token(); + let options = self.parse_options(Keyword::OPTIONS)?; AlterSchemaOperation::SetOptionsParens { options } } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT, Keyword::COLLATE]) { let collate = self.parse_expr()?; @@ -10006,12 +11133,12 @@ impl<'a> Parser<'a> { /// or `CALL procedure_name` statement pub fn parse_call(&mut self) -> Result { let object_name = self.parse_object_name(false)?; - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { match self.parse_function(object_name)? { Expr::Function(f) => Ok(Statement::Call(f)), other => parser_err!( format!("Expected a simple procedure call but found: {other}"), - self.peek_token().span.start + self.peek_token_ref().span.start ), } } else { @@ -10045,7 +11172,7 @@ impl<'a> Parser<'a> { let to = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::TO]) { Some(Keyword::FROM) => false, Some(Keyword::TO) => true, - _ => self.expected("FROM or TO", self.peek_token())?, + _ => self.expected_ref("FROM or TO", self.peek_token_ref())?, }; if !to { // Use a separate if statement to prevent Rust compiler from complaining about @@ -10079,12 +11206,13 @@ impl<'a> Parser<'a> { while let Some(opt) = self.maybe_parse(|parser| parser.parse_copy_legacy_option())? { legacy_options.push(opt); } - let values = if let CopyTarget::Stdin = target { - self.expect_token(&Token::SemiColon)?; - self.parse_tsv() - } else { - vec![] - }; + let values = + if matches!(target, CopyTarget::Stdin) && self.peek_token_ref().token != Token::EOF { + self.expect_token(&Token::SemiColon)?; + self.parse_tsv() + } else { + vec![] + }; Ok(Statement::Copy { source, to, @@ -10103,6 +11231,7 @@ impl<'a> Parser<'a> { })) } + /// Parse a `CLOSE` cursor statement. pub fn parse_close(&mut self) -> Result { let cursor = if self.parse_keyword(Keyword::ALL) { CloseCursor::All @@ -10152,7 +11281,7 @@ impl<'a> Parser<'a> { CopyOption::ForceNull(self.parse_parenthesized_column_list(Mandatory, false)?) } Some(Keyword::ENCODING) => CopyOption::Encoding(self.parse_literal_string()?), - _ => self.expected("option", self.peek_token())?, + _ => self.expected_ref("option", self.peek_token_ref())?, }; Ok(ret) } @@ -10173,6 +11302,7 @@ impl<'a> Parser<'a> { Keyword::BZIP2, Keyword::CLEANPATH, Keyword::COMPUPDATE, + Keyword::CREDENTIALS, Keyword::CSV, Keyword::DATEFORMAT, Keyword::DELIMITER, @@ -10203,7 +11333,7 @@ impl<'a> Parser<'a> { Some(Keyword::ACCEPTANYDATE) => CopyLegacyOption::AcceptAnyDate, Some(Keyword::ACCEPTINVCHARS) => { let _ = self.parse_keyword(Keyword::AS); // [ AS ] - let ch = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + let ch = if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { Some(self.parse_literal_string()?) } else { None @@ -10230,6 +11360,9 @@ impl<'a> Parser<'a> { }; CopyLegacyOption::CompUpdate { preset, enabled } } + Some(Keyword::CREDENTIALS) => { + CopyLegacyOption::Credentials(self.parse_literal_string()?) + } Some(Keyword::CSV) => CopyLegacyOption::Csv({ let mut opts = vec![]; while let Some(opt) = @@ -10241,7 +11374,7 @@ impl<'a> Parser<'a> { }), Some(Keyword::DATEFORMAT) => { let _ = self.parse_keyword(Keyword::AS); - let fmt = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + let fmt = if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { Some(self.parse_literal_string()?) } else { None @@ -10274,14 +11407,22 @@ impl<'a> Parser<'a> { let num_rows = self.parse_literal_uint()?; CopyLegacyOption::IgnoreHeader(num_rows) } - Some(Keyword::JSON) => CopyLegacyOption::Json, + Some(Keyword::JSON) => { + let _ = self.parse_keyword(Keyword::AS); + let fmt = if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { + Some(self.parse_literal_string()?) + } else { + None + }; + CopyLegacyOption::Json(fmt) + } Some(Keyword::MANIFEST) => { let verbose = self.parse_keyword(Keyword::VERBOSE); CopyLegacyOption::Manifest { verbose } } Some(Keyword::MAXFILESIZE) => { let _ = self.parse_keyword(Keyword::AS); - let size = self.parse_number_value()?.value; + let size = self.parse_number_value()?; let unit = match self.parse_one_of_keywords(&[Keyword::MB, Keyword::GB]) { Some(Keyword::MB) => Some(FileSizeUnit::MB), Some(Keyword::GB) => Some(FileSizeUnit::GB), @@ -10339,7 +11480,7 @@ impl<'a> Parser<'a> { } Some(Keyword::TIMEFORMAT) => { let _ = self.parse_keyword(Keyword::AS); - let fmt = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + let fmt = if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { Some(self.parse_literal_string()?) } else { None @@ -10348,13 +11489,13 @@ impl<'a> Parser<'a> { } Some(Keyword::TRUNCATECOLUMNS) => CopyLegacyOption::TruncateColumns, Some(Keyword::ZSTD) => CopyLegacyOption::Zstd, - _ => self.expected("option", self.peek_token())?, + _ => self.expected_ref("option", self.peek_token_ref())?, }; Ok(ret) } fn parse_file_size(&mut self) -> Result { - let size = self.parse_number_value()?.value; + let size = self.parse_number_value()?; let unit = self.maybe_parse_file_size_unit(); Ok(FileSize { size, unit }) } @@ -10402,7 +11543,7 @@ impl<'a> Parser<'a> { self.parse_comma_separated(|p| p.parse_identifier())?, ) } - _ => self.expected("csv option", self.peek_token())?, + _ => self.expected_ref("csv option", self.peek_token_ref())?, }; Ok(ret) } @@ -10425,18 +11566,17 @@ impl<'a> Parser<'a> { self.parse_tab_value() } + /// Parse a single tab-separated value row used by `COPY` payload parsing. pub fn parse_tab_value(&mut self) -> Vec> { let mut values = vec![]; - let mut content = String::from(""); + let mut content = String::new(); while let Some(t) = self.next_token_no_skip().map(|t| &t.token) { match t { Token::Whitespace(Whitespace::Tab) => { - values.push(Some(content.to_string())); - content.clear(); + values.push(Some(core::mem::take(&mut content))); } Token::Whitespace(Whitespace::Newline) => { - values.push(Some(content.to_string())); - content.clear(); + values.push(Some(core::mem::take(&mut content))); } Token::Backslash => { if self.consume_token(&Token::Period) { @@ -10533,6 +11673,12 @@ impl<'a> Parser<'a> { Token::NationalStringLiteral(ref s) => { ok_value(Value::NationalStringLiteral(s.to_string())) } + Token::QuoteDelimitedStringLiteral(v) => { + ok_value(Value::QuoteDelimitedStringLiteral(v)) + } + Token::NationalQuoteDelimitedStringLiteral(v) => { + ok_value(Value::NationalQuoteDelimitedStringLiteral(v)) + } Token::EscapedStringLiteral(ref s) => { ok_value(Value::EscapedStringLiteral(s.to_string())) } @@ -10555,7 +11701,7 @@ impl<'a> Parser<'a> { Token::Number(w, false) => Ok(Ident::with_span(next_token.span, w)), _ => self.expected("placeholder", next_token), }?; - Ok(Value::Placeholder(tok.to_string() + &ident.value) + Ok(Value::Placeholder(format!("{tok}{}", ident.value)) .with_span(Span::new(span.start, ident.span.end))) } unexpected => self.expected( @@ -10573,10 +11719,37 @@ impl<'a> Parser<'a> { while let Token::SingleQuotedString(ref s) | Token::DoubleQuotedString(ref s) = self.peek_token_ref().token { - str.push_str(s.clone().as_str()); + str.push_str(s); self.advance_token(); } + } else if self + .dialect + .supports_string_literal_concatenation_with_newline() + { + // We are iterating over tokens including whitespaces, to identify + // string literals separated by newlines so we can concatenate them. + let mut after_newline = false; + loop { + match self.peek_token_no_skip().token { + Token::Whitespace(Whitespace::Newline) => { + after_newline = true; + self.next_token_no_skip(); + } + Token::Whitespace(_) => { + self.next_token_no_skip(); + } + Token::SingleQuotedString(ref s) | Token::DoubleQuotedString(ref s) + if after_newline => + { + str.push_str(s.clone().as_str()); + self.next_token_no_skip(); + after_newline = false; + } + _ => break, + } + } } + str } @@ -10588,7 +11761,7 @@ impl<'a> Parser<'a> { Value::Placeholder(_) => Ok(value_wrapper), _ => { self.prev_token(); - self.expected("literal number", self.peek_token()) + self.expected_ref("literal number", self.peek_token_ref()) } } } @@ -10697,7 +11870,7 @@ impl<'a> Parser<'a> { match self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) { Some(Keyword::TRUE) => Ok(true), Some(Keyword::FALSE) => Ok(false), - _ => self.expected("TRUE or FALSE", self.peek_token()), + _ => self.expected_ref("TRUE or FALSE", self.peek_token_ref()), } } @@ -10715,7 +11888,7 @@ impl<'a> Parser<'a> { Some(Keyword::NFD) => Ok(NormalizationForm::NFD), Some(Keyword::NFKC) => Ok(NormalizationForm::NFKC), Some(Keyword::NFKD) => Ok(NormalizationForm::NFKD), - _ => parser.expected("unicode normalization form", parser.peek_token()), + _ => parser.expected_ref("unicode normalization form", parser.peek_token_ref()), } })?; if self.parse_keyword(Keyword::NORMALIZED) { @@ -10725,9 +11898,10 @@ impl<'a> Parser<'a> { negated: neg, }); } - self.expected("unicode normalization form", self.peek_token()) + self.expected_ref("unicode normalization form", self.peek_token_ref()) } + /// Parse parenthesized enum members, used with `ENUM(...)` type definitions. pub fn parse_enum_values(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let values = self.parse_comma_separated(|parser| { @@ -10751,7 +11925,7 @@ impl<'a> Parser<'a> { if trailing_bracket.0 { return parser_err!( format!("unmatched > after parsing data type {ty}"), - self.peek_token() + self.peek_token_ref() ); } @@ -11063,7 +12237,7 @@ impl<'a> Parser<'a> { Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))), Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), Keyword::ARRAY => { - if dialect_of!(self is SnowflakeDialect) { + if self.dialect.supports_array_typedef_without_element_type() { Ok(DataType::Array(ArrayElemTypeDef::None)) } else if dialect_of!(self is ClickHouseDialect) { Ok(self.parse_sub_type(|internal_type| { @@ -11083,7 +12257,8 @@ impl<'a> Parser<'a> { let field_defs = self.parse_duckdb_struct_type_def()?; Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses)) } - Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | GenericDialect) => { + Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | DatabricksDialect | GenericDialect) => + { self.prev_token(); let (field_defs, _trailing_bracket) = self.parse_struct_type_def(Self::parse_struct_field_def)?; @@ -11131,7 +12306,7 @@ impl<'a> Parser<'a> { Keyword::TABLE => { // an LParen after the TABLE keyword indicates that table columns are being defined // whereas no LParen indicates an anonymous table expression will be returned - if self.peek_token() == Token::LParen { + if self.peek_token_ref().token == Token::LParen { let columns = self.parse_returns_table_columns()?; Ok(DataType::Table(Some(columns))) } else { @@ -11193,6 +12368,7 @@ impl<'a> Parser<'a> { Ok(columns) } + /// Parse a parenthesized, comma-separated list of single-quoted strings. pub fn parse_string_values(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let mut values = Vec::new(); @@ -11301,9 +12477,9 @@ impl<'a> Parser<'a> { Keyword::IGNORE => TableIndexHintType::Ignore, Keyword::FORCE => TableIndexHintType::Force, _ => { - return self.expected( + return self.expected_ref( "expected to match USE/IGNORE/FORCE keyword", - self.peek_token(), + self.peek_token_ref(), ) } }; @@ -11311,7 +12487,8 @@ impl<'a> Parser<'a> { Some(Keyword::INDEX) => TableIndexType::Index, Some(Keyword::KEY) => TableIndexType::Key, _ => { - return self.expected("expected to match INDEX/KEY keyword", self.peek_token()) + return self + .expected_ref("expected to match INDEX/KEY keyword", self.peek_token_ref()) } }; let for_clause = if self.parse_keyword(Keyword::FOR) { @@ -11322,9 +12499,9 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { TableIndexHintForClause::GroupBy } else { - return self.expected( + return self.expected_ref( "expected to match FOR/ORDER BY/GROUP BY table hint in for clause", - self.peek_token(), + self.peek_token_ref(), ); }; Some(clause) @@ -11333,7 +12510,7 @@ impl<'a> Parser<'a> { }; self.expect_token(&Token::LParen)?; - let index_names = if self.peek_token().token != Token::RParen { + let index_names = if self.peek_token_ref().token != Token::RParen { self.parse_comma_separated(Parser::parse_identifier)? } else { vec![] @@ -11380,16 +12557,17 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { - // By default, if a word is located after the `AS` keyword we consider it an alias - // as long as it's not reserved. + // Accepts a keyword as an alias if the AS keyword explicitly indicate an alias or if the + // caller provided a list of reserved keywords and the keyword is not on that list. Token::Word(w) - if after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword)) => + if reserved_kwds.is_some() + && (after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword))) => { Ok(Some(w.into_ident(next_token.span))) } - // This pattern allows for customizing the acceptance of words as aliases based on the caller's - // context, such as to what SQL element this word is a potential alias of (select item alias, table name - // alias, etc.) or dialect-specific logic that goes beyond a simple list of reserved keywords. + // Accepts a keyword as alias based on the caller's context, such as to what SQL element + // this word is a potential alias of using the validator call-back. This allows for + // dialect-specific logic. Token::Word(w) if validator(after_as, &w.keyword, self) => { Ok(Some(w.into_ident(next_token.span))) } @@ -11406,6 +12584,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional `GROUP BY` clause, returning `Some(GroupByExpr)` when present. pub fn parse_optional_group_by(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { let expressions = if self.parse_keyword(Keyword::ALL) { @@ -11432,7 +12611,7 @@ impl<'a> Parser<'a> { _ => { return parser_err!( "BUG: expected to match GroupBy modifier keyword", - self.peek_token().span.start + self.peek_token_ref().span.start ) } }); @@ -11462,6 +12641,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional `ORDER BY` clause, returning `Some(OrderBy)` when present. pub fn parse_optional_order_by(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { let order_by = @@ -11473,7 +12653,7 @@ impl<'a> Parser<'a> { } } else { let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; - let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { + let interpolate = if self.dialect.supports_interpolate() { self.parse_interpolations()? } else { None @@ -11515,9 +12695,7 @@ impl<'a> Parser<'a> { })); } - let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::BY) - { + let limit_by = if self.dialect.supports_limit_by() && self.parse_keyword(Keyword::BY) { Some(self.parse_comma_separated(Parser::parse_expr)?) } else { None @@ -11550,6 +12728,9 @@ impl<'a> Parser<'a> { let fn_name = self.parse_object_name(false)?; self.parse_function_call(fn_name) .map(TableObject::TableFunction) + } else if self.dialect.supports_insert_table_query() && self.peek_subquery_or_cte_start() { + self.parse_parenthesized(|p| p.parse_query()) + .map(TableObject::TableQuery) } else { self.parse_object_name(false).map(TableObject::TableName) } @@ -11590,7 +12771,7 @@ impl<'a> Parser<'a> { } } else { loop { - if allow_wildcards && self.peek_token().token == Token::Mul { + if allow_wildcards && self.peek_token_ref().token == Token::Mul { let span = self.next_token().span; parts.push(ObjectNamePart::Identifier(Ident { value: Token::Mul.to_string(), @@ -11605,7 +12786,7 @@ impl<'a> Parser<'a> { } } else if self.dialect.supports_object_name_double_dot_notation() && parts.len() == 1 - && matches!(self.peek_token().token, Token::Period) + && matches!(self.peek_token_ref().token, Token::Period) { // Empty string here means default schema parts.push(ObjectNamePart::Identifier(Ident::new(""))); @@ -11666,11 +12847,14 @@ impl<'a> Parser<'a> { pub fn parse_identifiers(&mut self) -> Result, ParserError> { let mut idents = vec![]; loop { - match &self.peek_token_ref().token { + let token = self.peek_token_ref(); + match &token.token { Token::Word(w) => { - idents.push(w.clone().into_ident(self.peek_token_ref().span)); + idents.push(w.to_ident(token.span)); + } + Token::EOF | Token::Eq | Token::SemiColon | Token::VerticalBarRightAngleBracket => { + break } - Token::EOF | Token::Eq => break, _ => {} } self.advance_token(); @@ -11760,7 +12944,7 @@ impl<'a> Parser<'a> { token => { return Err(ParserError::ParserError(format!( "Unexpected token in identifier: {token}" - )))? + )))?; } } } @@ -11832,7 +13016,7 @@ impl<'a> Parser<'a> { } // If next token is period, then it is part of an ObjectName and we don't expect whitespace // after the number. - !matches!(self.peek_token().token, Token::Period) + !matches!(self.peek_token_ref().token, Token::Period) } _ => { return self @@ -11860,7 +13044,7 @@ impl<'a> Parser<'a> { /// Parses a parenthesized, comma-separated list of column definitions within a view. fn parse_view_columns(&mut self) -> Result, ParserError> { if self.consume_token(&Token::LParen) { - if self.peek_token().token == Token::RParen { + if self.peek_token_ref().token == Token::RParen { self.next_token(); Ok(vec![]) } else { @@ -11922,6 +13106,7 @@ impl<'a> Parser<'a> { self.parse_parenthesized_column_list_inner(optional, allow_empty, |p| p.parse_identifier()) } + /// Parse a parenthesized list of compound identifiers as expressions. pub fn parse_parenthesized_compound_identifier_list( &mut self, optional: IsOptional, @@ -11966,7 +13151,7 @@ impl<'a> Parser<'a> { F: FnMut(&mut Parser) -> Result, { if self.consume_token(&Token::LParen) { - if allow_empty && self.peek_token().token == Token::RParen { + if allow_empty && self.peek_token_ref().token == Token::RParen { self.next_token(); Ok(vec![]) } else { @@ -11977,7 +13162,7 @@ impl<'a> Parser<'a> { } else if optional == Optional { Ok(vec![]) } else { - self.expected("a list of columns in parentheses", self.peek_token()) + self.expected_ref("a list of columns in parentheses", self.peek_token_ref()) } } @@ -11996,6 +13181,7 @@ impl<'a> Parser<'a> { } } + /// Parse an unsigned precision value enclosed in parentheses, e.g. `(10)`. pub fn parse_precision(&mut self) -> Result { self.expect_token(&Token::LParen)?; let n = self.parse_literal_uint()?; @@ -12003,6 +13189,7 @@ impl<'a> Parser<'a> { Ok(n) } + /// Parse an optional precision `(n)` and return it as `Some(n)` when present. pub fn parse_optional_precision(&mut self) -> Result, ParserError> { if self.consume_token(&Token::LParen) { let n = self.parse_literal_uint()?; @@ -12048,7 +13235,7 @@ impl<'a> Parser<'a> { Keyword::SECOND => Ok(Some(IntervalFields::DayToSecond)), _ => { self.prev_token(); - self.expected("HOUR, MINUTE, or SECOND", self.peek_token()) + self.expected_ref("HOUR, MINUTE, or SECOND", self.peek_token_ref()) } } } else { @@ -12063,7 +13250,7 @@ impl<'a> Parser<'a> { Keyword::SECOND => Ok(Some(IntervalFields::HourToSecond)), _ => { self.prev_token(); - self.expected("MINUTE or SECOND", self.peek_token()) + self.expected_ref("MINUTE or SECOND", self.peek_token_ref()) } } } else { @@ -12083,9 +13270,9 @@ impl<'a> Parser<'a> { Some(Keyword::SECOND) => Ok(Some(IntervalFields::Second)), Some(_) => { self.prev_token(); - self.expected( + self.expected_ref( "YEAR, MONTH, DAY, HOUR, MINUTE, or SECOND", - self.peek_token(), + self.peek_token_ref(), ) } None => Ok(None), @@ -12112,6 +13299,7 @@ impl<'a> Parser<'a> { Ok((precision, time_zone)) } + /// Parse an optional character length specification `(n | MAX [CHARACTERS|OCTETS])`. pub fn parse_optional_character_length( &mut self, ) -> Result, ParserError> { @@ -12124,6 +13312,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional binary length specification like `(n)`. pub fn parse_optional_binary_length(&mut self) -> Result, ParserError> { if self.consume_token(&Token::LParen) { let binary_length = self.parse_binary_length()?; @@ -12134,6 +13323,7 @@ impl<'a> Parser<'a> { } } + /// Parse a character length, handling `MAX` or integer lengths with optional units. pub fn parse_character_length(&mut self) -> Result { if self.parse_keyword(Keyword::MAX) { return Ok(CharacterLength::Max); @@ -12149,6 +13339,7 @@ impl<'a> Parser<'a> { Ok(CharacterLength::IntegerLength { length, unit }) } + /// Parse a binary length specification, returning `BinaryLength`. pub fn parse_binary_length(&mut self) -> Result { if self.parse_keyword(Keyword::MAX) { return Ok(BinaryLength::Max); @@ -12157,6 +13348,7 @@ impl<'a> Parser<'a> { Ok(BinaryLength::IntegerLength { length }) } + /// Parse an optional `(precision[, scale])` and return `(Option, Option)`. pub fn parse_optional_precision_scale( &mut self, ) -> Result<(Option, Option), ParserError> { @@ -12174,6 +13366,7 @@ impl<'a> Parser<'a> { } } + /// Parse exact-number precision/scale info like `(precision[, scale])` for decimal types. pub fn parse_exact_number_optional_precision_scale( &mut self, ) -> Result { @@ -12217,6 +13410,7 @@ impl<'a> Parser<'a> { } } + /// Parse optional type modifiers appearing in parentheses e.g. `(UNSIGNED, ZEROFILL)`. pub fn parse_optional_type_modifiers(&mut self) -> Result>, ParserError> { if self.consume_token(&Token::LParen) { let mut modifiers = Vec::new(); @@ -12264,17 +13458,9 @@ impl<'a> Parser<'a> { Ok(Box::new(SetExpr::Delete(self.parse_delete(delete_token)?))) } - /// Parse a MERGE statement, returning a `Box`ed SetExpr - /// - /// This is used to reduce the size of the stack frames in debug builds - fn parse_merge_setexpr_boxed( - &mut self, - merge_token: TokenWithSpan, - ) -> Result, ParserError> { - Ok(Box::new(SetExpr::Merge(self.parse_merge(merge_token)?))) - } - + /// Parse a `DELETE` statement and return `Statement::Delete`. pub fn parse_delete(&mut self, delete_token: TokenWithSpan) -> Result { + let optimizer_hints = self.maybe_parse_optimizer_hints()?; let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. // https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement @@ -12290,6 +13476,9 @@ impl<'a> Parser<'a> { }; let from = self.parse_comma_separated(Parser::parse_table_and_joins)?; + + let output = self.maybe_parse_output_clause()?; + let using = if self.parse_keyword(Keyword::USING) { Some(self.parse_comma_separated(Parser::parse_table_and_joins)?) } else { @@ -12318,6 +13507,7 @@ impl<'a> Parser<'a> { Ok(Statement::Delete(Delete { delete_token: delete_token.into(), + optimizer_hints, tables, from: if with_from_keyword { FromTable::WithFromKeyword(from) @@ -12327,12 +13517,14 @@ impl<'a> Parser<'a> { using, selection, returning, + output, order_by, limit, })) } - // KILL [CONNECTION | QUERY | MUTATION] processlist_id + /// Parse a `KILL` statement, optionally specifying `CONNECTION`, `QUERY`, or `MUTATION`. + /// KILL [CONNECTION | QUERY | MUTATION] processlist_id pub fn parse_kill(&mut self) -> Result { let modifier_keyword = self.parse_one_of_keywords(&[Keyword::CONNECTION, Keyword::QUERY, Keyword::MUTATION]); @@ -12346,9 +13538,9 @@ impl<'a> Parser<'a> { if dialect_of!(self is ClickHouseDialect | GenericDialect) { Some(KillType::Mutation) } else { - self.expected( + self.expected_ref( "Unsupported type for KILL, allowed: CONNECTION | QUERY", - self.peek_token(), + self.peek_token_ref(), )? } } @@ -12358,6 +13550,7 @@ impl<'a> Parser<'a> { Ok(Statement::Kill { modifier, id }) } + /// Parse an `EXPLAIN` statement, handling dialect-specific options and modifiers. pub fn parse_explain( &mut self, describe_alias: DescribeAlias, @@ -12373,7 +13566,7 @@ impl<'a> Parser<'a> { // although not all features may be implemented. if describe_alias == DescribeAlias::Explain && self.dialect.supports_explain_with_utility_options() - && self.peek_token().token == Token::LParen + && self.peek_token_ref().token == Token::LParen { options = Some(self.parse_utility_options()?) } else if self.parse_keywords(&[Keyword::QUERY, Keyword::PLAN]) { @@ -12432,6 +13625,7 @@ impl<'a> Parser<'a> { /// preceded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't /// expect the initial keyword to be already consumed + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] pub fn parse_query(&mut self) -> Result, ParserError> { let _guard = self.recursion_counter.try_decrease()?; let with = if self.parse_keyword(Keyword::WITH) { @@ -12525,18 +13719,17 @@ impl<'a> Parser<'a> { locks.push(self.parse_lock()?); } } - let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::FORMAT) - { - if self.parse_keyword(Keyword::NULL) { - Some(FormatClause::Null) + let format_clause = + if self.dialect.supports_select_format() && self.parse_keyword(Keyword::FORMAT) { + if self.parse_keyword(Keyword::NULL) { + Some(FormatClause::Null) + } else { + let ident = self.parse_identifier()?; + Some(FormatClause::Identifier(ident)) + } } else { - let ident = self.parse_identifier()?; - Some(FormatClause::Identifier(ident)) - } - } else { - None - }; + None + }; let pipe_operators = if self.dialect.supports_pipe_operator() { self.parse_pipe_operators()? @@ -12699,7 +13892,7 @@ impl<'a> Parser<'a> { Keyword::PIVOT => { self.expect_token(&Token::LParen)?; let aggregate_functions = - self.parse_comma_separated(Self::parse_aliased_function_call)?; + self.parse_comma_separated(Self::parse_pivot_aggregate_function)?; self.expect_keyword_is(Keyword::FOR)?; let value_column = self.parse_period_separated(|p| p.parse_identifier())?; self.expect_keyword_is(Keyword::IN)?; @@ -12780,8 +13973,7 @@ impl<'a> Parser<'a> { } fn parse_settings(&mut self) -> Result>, ParserError> { - let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::SETTINGS) + let settings = if self.dialect.supports_settings() && self.parse_keyword(Keyword::SETTINGS) { let key_values = self.parse_comma_separated(|p| { let key = p.parse_identifier()?; @@ -12813,7 +14005,7 @@ impl<'a> Parser<'a> { pub fn parse_for_xml(&mut self) -> Result { let for_xml = if self.parse_keyword(Keyword::RAW) { let mut element_name = None; - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { self.expect_token(&Token::LParen)?; element_name = Some(self.parse_literal_string()?); self.expect_token(&Token::RParen)?; @@ -12825,7 +14017,7 @@ impl<'a> Parser<'a> { ForXml::Explicit } else if self.parse_keyword(Keyword::PATH) { let mut element_name = None; - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { self.expect_token(&Token::LParen)?; element_name = Some(self.parse_literal_string()?); self.expect_token(&Token::RParen)?; @@ -12840,7 +14032,7 @@ impl<'a> Parser<'a> { let mut binary_base64 = false; let mut root = None; let mut r#type = false; - while self.peek_token().token == Token::Comma { + while self.peek_token_ref().token == Token::Comma { self.next_token(); if self.parse_keyword(Keyword::ELEMENTS) { elements = true; @@ -12878,7 +14070,7 @@ impl<'a> Parser<'a> { let mut root = None; let mut include_null_values = false; let mut without_array_wrapper = false; - while self.peek_token().token == Token::Comma { + while self.peek_token_ref().token == Token::Comma { self.next_token(); if self.parse_keyword(Keyword::ROOT) { self.expect_token(&Token::LParen)?; @@ -12992,9 +14184,9 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::TABLE) { SetExpr::Table(Box::new(self.parse_as_table()?)) } else { - return self.expected( + return self.expected_ref( "SELECT, VALUES, or a subquery in the query body", - self.peek_token(), + self.peek_token_ref(), ); }; @@ -13038,6 +14230,7 @@ impl<'a> Parser<'a> { Ok(expr.into()) } + /// Parse a set operator token into its `SetOperator` variant. pub fn parse_set_operator(&mut self, token: &Token) -> Option { match token { Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), @@ -13048,6 +14241,7 @@ impl<'a> Parser<'a> { } } + /// Parse a set quantifier (e.g., `ALL`, `DISTINCT BY NAME`) for the given set operator. pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { Some( @@ -13086,7 +14280,9 @@ impl<'a> Parser<'a> { if !self.peek_keyword(Keyword::SELECT) { return Ok(Select { select_token: AttachedToken(from_token), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![], @@ -13105,7 +14301,7 @@ impl<'a> Parser<'a> { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::FromFirstNoSelect, }); } @@ -13113,15 +14309,29 @@ impl<'a> Parser<'a> { } let select_token = self.expect_keyword(Keyword::SELECT)?; + let optimizer_hints = self.maybe_parse_optimizer_hints()?; let value_table_mode = self.parse_value_table_mode()?; + let (select_modifiers, distinct_select_modifier) = + if self.dialect.supports_select_modifiers() { + self.parse_select_modifiers()? + } else { + (None, None) + }; + let mut top_before_distinct = false; let mut top = None; if self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { top = Some(self.parse_top()?); top_before_distinct = true; } - let distinct = self.parse_all_or_distinct()?; + + let distinct = if distinct_select_modifier.is_some() { + distinct_select_modifier + } else { + self.parse_all_or_distinct()? + }; + if !self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { top = Some(self.parse_top()?); } @@ -13189,8 +14399,7 @@ impl<'a> Parser<'a> { } } - let prewhere = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::PREWHERE) + let prewhere = if self.dialect.supports_prewhere() && self.parse_keyword(Keyword::PREWHERE) { Some(self.parse_expr()?) } else { @@ -13203,6 +14412,8 @@ impl<'a> Parser<'a> { None }; + let connect_by = self.maybe_parse_connect_by()?; + let group_by = self .parse_optional_group_by()? .unwrap_or_else(|| GroupByExpr::Expressions(vec![], vec![])); @@ -13255,20 +14466,11 @@ impl<'a> Parser<'a> { Default::default() }; - let connect_by = if self.dialect.supports_connect_by() - && self - .parse_one_of_keywords(&[Keyword::START, Keyword::CONNECT]) - .is_some() - { - self.prev_token(); - Some(self.parse_connect_by()?) - } else { - None - }; - Ok(Select { select_token: AttachedToken(select_token), + optimizer_hints, distinct, + select_modifiers, top, top_before_distinct, projection, @@ -13296,25 +14498,148 @@ impl<'a> Parser<'a> { }) } - fn parse_value_table_mode(&mut self) -> Result, ParserError> { - if !dialect_of!(self is BigQueryDialect) { - return Ok(None); + /// Parses optimizer hints at the current token position. + /// + /// Collects all `/*prefix+...*/` and `--prefix+...` patterns. + /// The `prefix` is any run of ASCII alphanumeric characters between the + /// comment marker and `+` (e.g. `""` for `/*+...*/`, `"abc"` for `/*abc+...*/`). + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html#optimizer-hints-overview) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + fn maybe_parse_optimizer_hints(&mut self) -> Result, ParserError> { + let supports_hints = self.dialect.supports_comment_optimizer_hint(); + if !supports_hints { + return Ok(vec![]); + } + let mut hints = vec![]; + loop { + let t = self.peek_nth_token_no_skip_ref(0); + let Token::Whitespace(ws) = &t.token else { + break; + }; + match ws { + Whitespace::SingleLineComment { comment, prefix } => { + if let Some((hint_prefix, text)) = Self::extract_hint_prefix_and_text(comment) { + hints.push(OptimizerHint { + prefix: hint_prefix, + text, + style: OptimizerHintStyle::SingleLine { + prefix: prefix.clone(), + }, + }); + } + self.next_token_no_skip(); + } + Whitespace::MultiLineComment(comment) => { + if let Some((hint_prefix, text)) = Self::extract_hint_prefix_and_text(comment) { + hints.push(OptimizerHint { + prefix: hint_prefix, + text, + style: OptimizerHintStyle::MultiLine, + }); + } + self.next_token_no_skip(); + } + Whitespace::Space | Whitespace::Tab | Whitespace::Newline => { + self.next_token_no_skip(); + } + } } + Ok(hints) + } - let mode = if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS, Keyword::VALUE]) { - Some(ValueTableMode::DistinctAsValue) - } else if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS, Keyword::STRUCT]) { - Some(ValueTableMode::DistinctAsStruct) - } else if self.parse_keywords(&[Keyword::AS, Keyword::VALUE]) - || self.parse_keywords(&[Keyword::ALL, Keyword::AS, Keyword::VALUE]) - { - Some(ValueTableMode::AsValue) + /// Checks if a comment's content starts with `[ASCII-alphanumeric]*+` + /// and returns `(prefix, text_after_plus)` if so. + fn extract_hint_prefix_and_text(comment: &str) -> Option<(String, String)> { + let (before_plus, text) = comment.split_once('+')?; + if before_plus.chars().all(|c| c.is_ascii_alphanumeric()) { + Some((before_plus.to_string(), text.to_string())) + } else { + None + } + } + + /// Parses MySQL SELECT modifiers and DISTINCT/ALL in any order. + /// + /// Manual testing shows odifiers can appear in any order, and modifiers other than DISTINCT/ALL + /// can be repeated. + /// + /// + fn parse_select_modifiers( + &mut self, + ) -> Result<(Option, Option), ParserError> { + let mut modifiers = SelectModifiers::default(); + let mut distinct = None; + + let keywords = &[ + Keyword::ALL, + Keyword::DISTINCT, + Keyword::DISTINCTROW, + Keyword::HIGH_PRIORITY, + Keyword::STRAIGHT_JOIN, + Keyword::SQL_SMALL_RESULT, + Keyword::SQL_BIG_RESULT, + Keyword::SQL_BUFFER_RESULT, + Keyword::SQL_NO_CACHE, + Keyword::SQL_CALC_FOUND_ROWS, + ]; + + while let Some(keyword) = self.parse_one_of_keywords(keywords) { + match keyword { + Keyword::ALL | Keyword::DISTINCT if distinct.is_none() => { + self.prev_token(); + distinct = self.parse_all_or_distinct()?; + } + // DISTINCTROW is a MySQL-specific legacy (but not deprecated) alias for DISTINCT + Keyword::DISTINCTROW if distinct.is_none() => { + distinct = Some(Distinct::Distinct); + } + Keyword::HIGH_PRIORITY => modifiers.high_priority = true, + Keyword::STRAIGHT_JOIN => modifiers.straight_join = true, + Keyword::SQL_SMALL_RESULT => modifiers.sql_small_result = true, + Keyword::SQL_BIG_RESULT => modifiers.sql_big_result = true, + Keyword::SQL_BUFFER_RESULT => modifiers.sql_buffer_result = true, + Keyword::SQL_NO_CACHE => modifiers.sql_no_cache = true, + Keyword::SQL_CALC_FOUND_ROWS => modifiers.sql_calc_found_rows = true, + _ => { + self.prev_token(); + return self.expected_ref( + "HIGH_PRIORITY, STRAIGHT_JOIN, or other MySQL select modifier", + self.peek_token_ref(), + ); + } + } + } + + // Avoid polluting the AST with `Some(SelectModifiers::default())` empty value unless there + // actually were some modifiers set. + let select_modifiers = if modifiers.is_any_set() { + Some(modifiers) + } else { + None + }; + Ok((select_modifiers, distinct)) + } + + fn parse_value_table_mode(&mut self) -> Result, ParserError> { + if !dialect_of!(self is BigQueryDialect) { + return Ok(None); + } + + let mode = if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS, Keyword::VALUE]) { + Some(ValueTableMode::DistinctAsValue) + } else if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS, Keyword::STRUCT]) { + Some(ValueTableMode::DistinctAsStruct) + } else if self.parse_keywords(&[Keyword::AS, Keyword::VALUE]) + || self.parse_keywords(&[Keyword::ALL, Keyword::AS, Keyword::VALUE]) + { + Some(ValueTableMode::AsValue) } else if self.parse_keywords(&[Keyword::AS, Keyword::STRUCT]) || self.parse_keywords(&[Keyword::ALL, Keyword::AS, Keyword::STRUCT]) { Some(ValueTableMode::AsStruct) } else if self.parse_keyword(Keyword::AS) { - self.expected("VALUE or STRUCT", self.peek_token())? + self.expected_ref("VALUE or STRUCT", self.peek_token_ref())? } else { None }; @@ -13336,27 +14661,29 @@ impl<'a> Parser<'a> { res } - pub fn parse_connect_by(&mut self) -> Result { - let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) { - let relationships = self.with_state(ParserState::ConnectBy, |parser| { - parser.parse_comma_separated(Parser::parse_expr) - })?; - self.expect_keywords(&[Keyword::START, Keyword::WITH])?; - let condition = self.parse_expr()?; - (condition, relationships) - } else { - self.expect_keywords(&[Keyword::START, Keyword::WITH])?; - let condition = self.parse_expr()?; - self.expect_keywords(&[Keyword::CONNECT, Keyword::BY])?; - let relationships = self.with_state(ParserState::ConnectBy, |parser| { - parser.parse_comma_separated(Parser::parse_expr) - })?; - (condition, relationships) - }; - Ok(ConnectBy { - condition, - relationships, - }) + /// Parse a `CONNECT BY` clause (Oracle-style hierarchical query support). + pub fn maybe_parse_connect_by(&mut self) -> Result, ParserError> { + let mut clauses = Vec::with_capacity(2); + loop { + if let Some(idx) = self.parse_keywords_indexed(&[Keyword::START, Keyword::WITH]) { + clauses.push(ConnectByKind::StartWith { + start_token: self.token_at(idx).clone().into(), + condition: self.parse_expr()?.into(), + }); + } else if let Some(idx) = self.parse_keywords_indexed(&[Keyword::CONNECT, Keyword::BY]) + { + clauses.push(ConnectByKind::ConnectBy { + connect_token: self.token_at(idx).clone().into(), + nocycle: self.parse_keyword(Keyword::NOCYCLE), + relationships: self.with_state(ParserState::ConnectBy, |parser| { + parser.parse_comma_separated(Parser::parse_expr) + })?, + }); + } else { + break; + } + } + Ok(clauses) } /// Parse `CREATE TABLE x AS TABLE y` @@ -13438,7 +14765,7 @@ impl<'a> Parser<'a> { } else if let Ok(expr) = self.parse_expr() { expr } else { - self.expected("variable value", self.peek_token())? + self.expected_ref("variable value", self.peek_token_ref())? }; values.push(value); @@ -13470,13 +14797,13 @@ impl<'a> Parser<'a> { // Parenthesized assignments are handled in the `parse_set` function after // trying to parse list of assignments using this function. // If a dialect supports both, and we find a LParen, we early exit from this function. - self.expected("Unparenthesized assignment", self.peek_token())? + self.expected_ref("Unparenthesized assignment", self.peek_token_ref())? } else { self.parse_object_name(false)? }; if !(self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO)) { - return self.expected("assignment operator", self.peek_token()); + return self.expected_ref("assignment operator", self.peek_token_ref()); } let value = self.parse_expr()?; @@ -13550,7 +14877,7 @@ impl<'a> Parser<'a> { .into()); } else if self.parse_keyword(Keyword::TRANSACTION) { if self.parse_keyword(Keyword::SNAPSHOT) { - let snapshot_id = self.parse_value()?.value; + let snapshot_id = self.parse_value()?; return Ok(Set::SetTransaction { modes: vec![], snapshot: Some(snapshot_id), @@ -13565,6 +14892,15 @@ impl<'a> Parser<'a> { } .into()); } else if self.parse_keyword(Keyword::AUTHORIZATION) { + let scope = match scope { + Some(s) => s, + None => { + return self.expected_at( + "SESSION, LOCAL, or other scope modifier before AUTHORIZATION", + self.get_current_index(), + ) + } + }; let auth_value = if self.parse_keyword(Keyword::DEFAULT) { SetSessionAuthorizationParamKind::Default } else { @@ -13572,7 +14908,7 @@ impl<'a> Parser<'a> { SetSessionAuthorizationParamKind::User(value) }; return Ok(Set::SetSessionAuthorization(SetSessionAuthorizationParam { - scope: scope.expect("SET ... AUTHORIZATION must have a scope"), + scope, kind: auth_value, }) .into()); @@ -13642,9 +14978,10 @@ impl<'a> Parser<'a> { return self.parse_set_session_params(); }; - self.expected("equals sign or TO", self.peek_token()) + self.expected_ref("equals sign or TO", self.peek_token_ref()) } + /// Parse session parameter assignments after `SET` when no `=` or `TO` is present. pub fn parse_set_session_params(&mut self) -> Result { if self.parse_keyword(Keyword::STATISTICS) { let topic = match self.parse_one_of_keywords(&[ @@ -13657,7 +14994,7 @@ impl<'a> Parser<'a> { Some(Keyword::PROFILE) => SessionParamStatsTopic::Profile, Some(Keyword::TIME) => SessionParamStatsTopic::Time, Some(Keyword::XML) => SessionParamStatsTopic::Xml, - _ => return self.expected("IO, PROFILE, TIME or XML", self.peek_token()), + _ => return self.expected_ref("IO, PROFILE, TIME or XML", self.peek_token_ref()), }; let value = self.parse_session_param_value()?; Ok( @@ -13715,10 +15052,11 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::OFF) { Ok(SessionParamValue::Off) } else { - self.expected("ON or OFF", self.peek_token()) + self.expected_ref("ON or OFF", self.peek_token_ref()) } } + /// Parse a `SHOW` statement and dispatch to specific SHOW handlers. pub fn parse_show(&mut self) -> Result { let terse = self.parse_keyword(Keyword::TERSE); let extended = self.parse_keyword(Keyword::EXTENDED); @@ -13806,6 +15144,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `SHOW CREATE ` returning the corresponding `ShowCreate` statement. pub fn parse_show_create(&mut self) -> Result { let obj_type = match self.expect_one_of_keywords(&[ Keyword::TABLE, @@ -13831,6 +15170,7 @@ impl<'a> Parser<'a> { Ok(Statement::ShowCreate { obj_type, obj_name }) } + /// Parse `SHOW COLUMNS`/`SHOW FIELDS` and return a `ShowColumns` statement. pub fn parse_show_columns( &mut self, extended: bool, @@ -13876,16 +15216,19 @@ impl<'a> Parser<'a> { }) } + /// Parse `SHOW FUNCTIONS` and optional filter. pub fn parse_show_functions(&mut self) -> Result { let filter = self.parse_show_statement_filter()?; Ok(Statement::ShowFunctions { filter }) } + /// Parse `SHOW COLLATION` and optional filter. pub fn parse_show_collation(&mut self) -> Result { let filter = self.parse_show_statement_filter()?; Ok(Statement::ShowCollation { filter }) } + /// Parse an optional filter used by `SHOW` statements (LIKE, ILIKE, WHERE, or literal). pub fn parse_show_statement_filter( &mut self, ) -> Result, ParserError> { @@ -13909,6 +15252,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `USE` statement (database/catalog/schema/warehouse/role selection). pub fn parse_use(&mut self) -> Result { // Determine which keywords are recognized by the current dialect let parsed_keyword = if dialect_of!(self is HiveDialect) { @@ -13960,6 +15304,7 @@ impl<'a> Parser<'a> { } } + /// Parse a table factor followed by any join clauses, returning `TableWithJoins`. pub fn parse_table_and_joins(&mut self) -> Result { let relation = self.parse_table_factor()?; // Note that for keywords to be properly handled here, they need to be @@ -13980,7 +15325,7 @@ impl<'a> Parser<'a> { // MSSQL extension, similar to CROSS JOIN LATERAL JoinOperator::CrossApply } else { - return self.expected("JOIN or APPLY after CROSS", self.peek_token()); + return self.expected_ref("JOIN or APPLY after CROSS", self.peek_token_ref()); }; let relation = self.parse_table_factor()?; let join_operator = if matches!(join_operator, JoinOperator::CrossJoin(_)) @@ -14019,7 +15364,7 @@ impl<'a> Parser<'a> { } } else { let natural = self.parse_keyword(Keyword::NATURAL); - let peek_keyword = if let Token::Word(w) = self.peek_token().token { + let peek_keyword = if let Token::Word(w) = &self.peek_token_ref().token { w.keyword } else { Keyword::NoKeyword @@ -14100,14 +15445,15 @@ impl<'a> Parser<'a> { JoinOperator::FullOuter } Keyword::OUTER => { - return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); + return self.expected_ref("LEFT, RIGHT, or FULL", self.peek_token_ref()); } Keyword::STRAIGHT_JOIN => { let _ = self.next_token(); // consume STRAIGHT_JOIN JoinOperator::StraightJoin } _ if natural => { - return self.expected("a join type after NATURAL", self.peek_token()); + return self + .expected_ref("a join type after NATURAL", self.peek_token_ref()); } _ => break, }; @@ -14152,7 +15498,9 @@ impl<'a> Parser<'a> { } /// A table name or a parenthesized subquery, followed by optional `[AS] alias` + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] pub fn parse_table_factor(&mut self) -> Result { + let _guard = self.recursion_counter.try_decrease()?; if self.parse_keyword(Keyword::LATERAL) { // LATERAL must always be followed by a subquery or table function. if self.consume_token(&Token::LParen) { @@ -14206,7 +15554,9 @@ impl<'a> Parser<'a> { table = match kw { Keyword::PIVOT => self.parse_pivot_table_factor(table)?, Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in pivot/unpivot"), + )), } } return Ok(table); @@ -14241,7 +15591,7 @@ impl<'a> Parser<'a> { table_with_joins: Box::new(table_and_joins), alias, }) - } else if dialect_of!(self is SnowflakeDialect | GenericDialect) { + } else if self.dialect.supports_parens_around_table_factor() { // Dialect-specific behavior: Snowflake diverges from the // standard and from most of the other implementations by // allowing extra parentheses not only around a join (B), but @@ -14286,9 +15636,9 @@ impl<'a> Parser<'a> { } else { // The SQL spec prohibits derived tables and bare tables from // appearing alone in parentheses (e.g. `FROM (mytable)`) - self.expected("joined table", self.peek_token()) + self.expected_ref("joined table", self.peek_token_ref()) } - } else if dialect_of!(self is SnowflakeDialect | DatabricksDialect | GenericDialect) + } else if self.dialect.supports_values_as_table_factor() && matches!( self.peek_tokens(), [ @@ -14322,6 +15672,7 @@ impl<'a> Parser<'a> { pipe_operators: vec![], }), alias, + sample: None, }) } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) && self.parse_keyword(Keyword::UNNEST) @@ -14362,7 +15713,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { let json_expr = self.parse_expr()?; self.expect_token(&Token::Comma)?; - let json_path = self.parse_value()?.value; + let json_path = self.parse_value()?; self.expect_keyword_is(Keyword::COLUMNS)?; self.expect_token(&Token::LParen)?; let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?; @@ -14385,10 +15736,13 @@ impl<'a> Parser<'a> { && self.peek_keyword_with_tokens(Keyword::SEMANTIC_VIEW, &[Token::LParen]) { self.parse_semantic_view_table_factor() + } else if self.peek_token_ref().token == Token::AtSign { + // Stage reference: @mystage or @namespace.stage (e.g. Snowflake) + self.parse_snowflake_stage_table_factor() } else { let name = self.parse_object_name(true)?; - let json_path = match self.peek_token().token { + let json_path = match &self.peek_token_ref().token { Token::LBracket if self.dialect.supports_partiql() => Some(self.parse_json_path()?), _ => None, }; @@ -14465,7 +15819,9 @@ impl<'a> Parser<'a> { table = match kw { Keyword::PIVOT => self.parse_pivot_table_factor(table)?, Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in pivot/unpivot"), + )), } } @@ -14479,6 +15835,37 @@ impl<'a> Parser<'a> { } } + /// Parse a Snowflake stage reference as a table factor. + /// Handles syntax like: `@mystage1 (file_format => 'myformat', pattern => '...')` + /// + /// See: + fn parse_snowflake_stage_table_factor(&mut self) -> Result { + // Parse the stage name starting with @ + let name = crate::dialect::parse_snowflake_stage_name(self)?; + + // Parse optional stage options like (file_format => 'myformat', pattern => '...') + let args = if self.consume_token(&Token::LParen) { + Some(self.parse_table_function_args()?) + } else { + None + }; + + let alias = self.maybe_parse_table_alias()?; + + Ok(TableFactor::Table { + name, + alias, + args, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }) + } + fn maybe_parse_table_sample(&mut self) -> Result>, ParserError> { let modifier = if self.parse_keyword(Keyword::TABLESAMPLE) { TableSampleModifier::TableSample @@ -14510,9 +15897,9 @@ impl<'a> Parser<'a> { let parenthesized = self.consume_token(&Token::LParen); let (quantity, bucket) = if parenthesized && self.parse_keyword(Keyword::BUCKET) { - let selected_bucket = self.parse_number_value()?.value; + let selected_bucket = self.parse_number_value()?; self.expect_keywords(&[Keyword::OUT, Keyword::OF])?; - let total = self.parse_number_value()?.value; + let total = self.parse_number_value()?; let on = if self.parse_keyword(Keyword::ON) { Some(self.parse_expr()?) } else { @@ -14536,7 +15923,7 @@ impl<'a> Parser<'a> { } else { return parser_err!( "Expecting number or byte length e.g. 100M", - self.peek_token().span.start + self.peek_token_ref().span.start ); } } @@ -14590,7 +15977,7 @@ impl<'a> Parser<'a> { modifier: TableSampleSeedModifier, ) -> Result { self.expect_token(&Token::LParen)?; - let value = self.parse_number_value()?.value; + let value = self.parse_number_value()?; self.expect_token(&Token::RParen)?; Ok(TableSampleSeed { modifier, value }) } @@ -14601,7 +15988,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let json_expr = self.parse_expr()?; let json_path = if self.consume_token(&Token::Comma) { - Some(self.parse_value()?.value) + Some(self.parse_value()?) } else { None }; @@ -14729,7 +16116,7 @@ impl<'a> Parser<'a> { let mut facts = Vec::new(); let mut where_clause = None; - while self.peek_token().token != Token::RParen { + while self.peek_token_ref().token != Token::RParen { if self.parse_keyword(Keyword::DIMENSIONS) { if !dimensions.is_empty() { return Err(ParserError::ParserError( @@ -14759,12 +16146,13 @@ impl<'a> Parser<'a> { } where_clause = Some(self.parse_expr()?); } else { + let tok = self.peek_token_ref(); return parser_err!( format!( "Expected one of DIMENSIONS, METRICS, FACTS or WHERE, got {}", - self.peek_token().token + tok.token ), - self.peek_token().span.start + tok.span.start )?; } } @@ -14984,7 +16372,7 @@ impl<'a> Parser<'a> { fn parse_concat_pattern(&mut self) -> Result { let mut patterns = vec![self.parse_repetition_pattern()?]; - while !matches!(self.peek_token().token, Token::RParen | Token::Pipe) { + while !matches!(self.peek_token_ref().token, Token::RParen | Token::Pipe) { patterns.push(self.parse_repetition_pattern()?); } match <[MatchRecognizePattern; 1]>::try_from(patterns) { @@ -15011,26 +16399,58 @@ impl<'a> Parser<'a> { /// Parses a the timestamp version specifier (i.e. query historical data) pub fn maybe_parse_table_version(&mut self) -> Result, ParserError> { - if self.dialect.supports_timestamp_versioning() { + if self.dialect.supports_table_versioning() { if self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) { let expr = self.parse_expr()?; return Ok(Some(TableVersion::ForSystemTimeAsOf(expr))); + } else if self.peek_keyword(Keyword::CHANGES) { + return self.parse_table_version_changes().map(Some); } else if self.peek_keyword(Keyword::AT) || self.peek_keyword(Keyword::BEFORE) { let func_name = self.parse_object_name(true)?; let func = self.parse_function(func_name)?; return Ok(Some(TableVersion::Function(func))); + } else if self.parse_keywords(&[Keyword::TIMESTAMP, Keyword::AS, Keyword::OF]) { + let expr = self.parse_expr()?; + return Ok(Some(TableVersion::TimestampAsOf(expr))); + } else if self.parse_keywords(&[Keyword::VERSION, Keyword::AS, Keyword::OF]) { + let expr = Expr::Value(self.parse_number_value()?); + return Ok(Some(TableVersion::VersionAsOf(expr))); } } Ok(None) } + /// Parses the Snowflake `CHANGES` clause for change tracking queries. + /// + /// Syntax: + /// ```sql + /// CHANGES (INFORMATION => DEFAULT) + /// AT (TIMESTAMP => ) + /// [END (TIMESTAMP => )] + /// ``` + /// + /// + fn parse_table_version_changes(&mut self) -> Result { + let changes_name = self.parse_object_name(true)?; + let changes = self.parse_function(changes_name)?; + let at_name = self.parse_object_name(true)?; + let at = self.parse_function(at_name)?; + let end = if self.peek_keyword(Keyword::END) { + let end_name = self.parse_object_name(true)?; + Some(self.parse_function(end_name)?) + } else { + None + }; + Ok(TableVersion::Changes { changes, at, end }) + } + /// Parses MySQL's JSON_TABLE column definition. /// For example: `id INT EXISTS PATH '$' DEFAULT '0' ON EMPTY ERROR ON ERROR` pub fn parse_json_table_column_def(&mut self) -> Result { if self.parse_keyword(Keyword::NESTED) { let _has_path_keyword = self.parse_keyword(Keyword::PATH); - let path = self.parse_value()?.value; + let path = self.parse_value()?; self.expect_keyword_is(Keyword::COLUMNS)?; let columns = self.parse_parenthesized(|p| { p.parse_comma_separated(Self::parse_json_table_column_def) @@ -15048,7 +16468,7 @@ impl<'a> Parser<'a> { let r#type = self.parse_data_type()?; let exists = self.parse_keyword(Keyword::EXISTS); self.expect_keyword_is(Keyword::PATH)?; - let path = self.parse_value()?.value; + let path = self.parse_value()?; let mut on_empty = None; let mut on_error = None; while let Some(error_handling) = self.parse_json_table_column_error_handling()? { @@ -15105,7 +16525,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::ERROR) { JsonTableColumnErrorHandling::Error } else if self.parse_keyword(Keyword::DEFAULT) { - JsonTableColumnErrorHandling::Default(self.parse_value()?.value) + JsonTableColumnErrorHandling::Default(self.parse_value()?) } else { return Ok(None); }; @@ -15113,6 +16533,7 @@ impl<'a> Parser<'a> { Ok(Some(res)) } + /// Parse a derived table factor (a parenthesized subquery), handling optional LATERAL. pub fn parse_derived_table_factor( &mut self, lateral: IsLateral, @@ -15120,6 +16541,12 @@ impl<'a> Parser<'a> { let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; let alias = self.maybe_parse_table_alias()?; + + // Parse optional SAMPLE clause after alias + let sample = self + .maybe_parse_table_sample()? + .map(TableSampleKind::AfterTableAlias); + Ok(TableFactor::Derived { lateral: match lateral { Lateral => true, @@ -15127,23 +16554,10 @@ impl<'a> Parser<'a> { }, subquery, alias, + sample, }) } - fn parse_aliased_function_call(&mut self) -> Result { - let function_name = match self.next_token().token { - Token::Word(w) => Ok(w.value), - _ => self.expected("a function identifier", self.peek_token()), - }?; - let expr = self.parse_function(ObjectName::from(vec![Ident::new(function_name)]))?; - let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier()?) - } else { - None - }; - - Ok(ExprWithAlias { expr, alias }) - } /// Parses an expression with an optional alias /// /// Examples: @@ -15177,12 +16591,40 @@ impl<'a> Parser<'a> { Ok(ExprWithAlias { expr, alias }) } + /// Parse an expression followed by an optional alias; Unlike + /// [Self::parse_expr_with_alias] the "AS" keyword between the expression + /// and the alias is optional. + fn parse_expr_with_alias_optional_as_keyword(&mut self) -> Result { + let expr = self.parse_expr()?; + let alias = self.parse_identifier_optional_alias()?; + Ok(ExprWithAlias { expr, alias }) + } + + /// Parses a plain function call with an optional alias for the `PIVOT` clause + fn parse_pivot_aggregate_function(&mut self) -> Result { + let function_name = match self.next_token().token { + Token::Word(w) => Ok(w.value), + _ => self.expected_ref("a function identifier", self.peek_token_ref()), + }?; + let expr = self.parse_function(ObjectName::from(vec![Ident::new(function_name)]))?; + let alias = { + fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + // ~ for a PIVOT aggregate function the alias must not be a "FOR"; in any dialect + kw != &Keyword::FOR && parser.dialect.is_select_item_alias(explicit, kw, parser) + } + self.parse_optional_alias_inner(None, validator)? + }; + Ok(ExprWithAlias { expr, alias }) + } + + /// Parse a PIVOT table factor (ClickHouse/Oracle style pivot), returning a TableFactor. pub fn parse_pivot_table_factor( &mut self, table: TableFactor, ) -> Result { self.expect_token(&Token::LParen)?; - let aggregate_functions = self.parse_comma_separated(Self::parse_aliased_function_call)?; + let aggregate_functions = + self.parse_comma_separated(Self::parse_pivot_aggregate_function)?; self.expect_keyword_is(Keyword::FOR)?; let value_column = if self.peek_token_ref().token == Token::LParen { self.parse_parenthesized_column_list_inner(Mandatory, false, |p| { @@ -15204,7 +16646,9 @@ impl<'a> Parser<'a> { } else if self.peek_sub_query() { PivotValueSource::Subquery(self.parse_query()?) } else { - PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) + PivotValueSource::List( + self.parse_comma_separated(Self::parse_expr_with_alias_optional_as_keyword)?, + ) }; self.expect_token(&Token::RParen)?; @@ -15230,6 +16674,7 @@ impl<'a> Parser<'a> { }) } + /// Parse an UNPIVOT table factor, returning a TableFactor. pub fn parse_unpivot_table_factor( &mut self, table: TableFactor, @@ -15263,6 +16708,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a JOIN constraint (`NATURAL`, `ON `, `USING (...)`, or no constraint). pub fn parse_join_constraint(&mut self, natural: bool) -> Result { if natural { Ok(JoinConstraint::Natural) @@ -15274,12 +16720,12 @@ impl<'a> Parser<'a> { Ok(JoinConstraint::Using(columns)) } else { Ok(JoinConstraint::None) - //self.expected("ON, or USING after JOIN", self.peek_token()) + //self.expected_ref("ON, or USING after JOIN", self.peek_token_ref()) } } /// Parse a GRANT statement. - pub fn parse_grant(&mut self) -> Result { + pub fn parse_grant(&mut self) -> Result { let (privileges, objects) = self.parse_grant_deny_revoke_privileges_objects()?; self.expect_keyword_is(Keyword::TO)?; @@ -15309,7 +16755,7 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::Grant { + Ok(Grant { privileges, objects, grantees, @@ -15388,6 +16834,7 @@ impl<'a> Parser<'a> { Ok(values) } + /// Parse privileges and optional target objects for GRANT/DENY/REVOKE statements. pub fn parse_grant_deny_revoke_privileges_objects( &mut self, ) -> Result<(Privileges, Option), ParserError> { @@ -15560,11 +17007,13 @@ impl<'a> Parser<'a> { if let Some(name) = objects?.first() { self.parse_grant_procedure_or_function(name, &kw)? } else { - self.expected("procedure or function name", self.peek_token())? + self.expected_ref("procedure or function name", self.peek_token_ref())? } } Some(Keyword::TABLE) | None => Some(GrantObjects::Tables(objects?)), - _ => unreachable!(), + Some(unexpected_keyword) => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in grant objects"), + )), } } } else { @@ -15595,10 +17044,11 @@ impl<'a> Parser<'a> { name: name.clone(), arg_types, })), - _ => self.expected("procedure or function keywords", self.peek_token())?, + _ => self.expected_ref("procedure or function keywords", self.peek_token_ref())?, } } + /// Parse a single grantable permission/action (used within GRANT statements). pub fn parse_grant_permission(&mut self) -> Result { fn parse_columns(parser: &mut Parser) -> Result>, ParserError> { let columns = parser.parse_parenthesized_column_list(Optional, false)?; @@ -15715,7 +17165,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::DROP) { Ok(Action::Drop) } else { - self.expected("a privilege keyword", self.peek_token())? + self.expected_ref("a privilege keyword", self.peek_token_ref())? } } @@ -15784,7 +17234,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::TAG) { Ok(ActionApplyType::Tag) } else { - self.expected("GRANT APPLY type", self.peek_token()) + self.expected_ref("GRANT APPLY type", self.peek_token_ref()) } } @@ -15820,7 +17270,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::WAREHOUSES) { Ok(ActionManageType::Warehouses) } else { - self.expected("GRANT MANAGE type", self.peek_token()) + self.expected_ref("GRANT MANAGE type", self.peek_token_ref()) } } @@ -15850,6 +17300,7 @@ impl<'a> Parser<'a> { } } + /// Parse a grantee name, possibly with a host qualifier (user@host). pub fn parse_grantee_name(&mut self) -> Result { let mut name = self.parse_object_name(false)?; if self.dialect.supports_user_host_grantee() @@ -15875,7 +17326,7 @@ impl<'a> Parser<'a> { None => { return parser_err!( "DENY statements must specify an object", - self.peek_token().span.start + self.peek_token_ref().span.start ) } }; @@ -15899,7 +17350,7 @@ impl<'a> Parser<'a> { } /// Parse a REVOKE statement - pub fn parse_revoke(&mut self) -> Result { + pub fn parse_revoke(&mut self) -> Result { let (privileges, objects) = self.parse_grant_deny_revoke_privileges_objects()?; self.expect_keyword_is(Keyword::FROM)?; @@ -15913,7 +17364,7 @@ impl<'a> Parser<'a> { let cascade = self.parse_cascade_option(); - Ok(Statement::Revoke { + Ok(Revoke { privileges, objects, grantees, @@ -15930,7 +17381,7 @@ impl<'a> Parser<'a> { if !dialect_of!(self is MySqlDialect | GenericDialect) { return parser_err!( "Unsupported statement REPLACE", - self.peek_token().span.start + self.peek_token_ref().span.start ); } @@ -15954,6 +17405,7 @@ impl<'a> Parser<'a> { /// Parse an INSERT statement pub fn parse_insert(&mut self, insert_token: TokenWithSpan) -> Result { + let optimizer_hints = self.maybe_parse_optimizer_hints()?; let or = self.parse_conflict_clause(); let priority = if !dialect_of!(self is MySqlDialect | GenericDialect) { None @@ -15997,22 +17449,38 @@ impl<'a> Parser<'a> { let table = self.parse_keyword(Keyword::TABLE); let table_object = self.parse_table_object()?; - let table_alias = - if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier()?) + let table_alias = if self.dialect.supports_insert_table_alias() + && !self.peek_sub_query() + && self + .peek_one_of_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) + .is_none() + { + if self.parse_keyword(Keyword::AS) { + Some(TableAliasWithoutColumns { + explicit: true, + alias: self.parse_identifier()?, + }) } else { - None - }; + self.maybe_parse(|parser| parser.parse_identifier())? + .map(|alias| TableAliasWithoutColumns { + explicit: false, + alias, + }) + } + } else { + None + }; let is_mysql = dialect_of!(self is MySqlDialect); - let (columns, partitioned, after_columns, source, assignments) = if self + let (columns, partitioned, after_columns, output, source, assignments) = if self .parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { - (vec![], None, vec![], None, vec![]) + (vec![], None, vec![], None, None, vec![]) } else { let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + let columns = + self.parse_parenthesized_qualified_column_list(Optional, is_mysql)?; let partitioned = self.parse_insert_partition()?; // Hive allows you to specify columns after partitions as well if you want. @@ -16026,6 +17494,8 @@ impl<'a> Parser<'a> { Default::default() }; + let output = self.maybe_parse_output_clause()?; + let (source, assignments) = if self.peek_keyword(Keyword::FORMAT) || self.peek_keyword(Keyword::SETTINGS) { @@ -16036,7 +17506,14 @@ impl<'a> Parser<'a> { (Some(self.parse_query()?), vec![]) }; - (columns, partitioned, after_columns, source, assignments) + ( + columns, + partitioned, + after_columns, + output, + source, + assignments, + ) }; let (format_clause, settings) = if self.dialect.supports_insert_format() { @@ -16073,7 +17550,7 @@ impl<'a> Parser<'a> { let conflict_target = if self.parse_keywords(&[Keyword::ON, Keyword::CONSTRAINT]) { Some(ConflictTarget::OnConstraint(self.parse_object_name(false)?)) - } else if self.peek_token() == Token::LParen { + } else if self.peek_token_ref().token == Token::LParen { Some(ConflictTarget::Columns( self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, )) @@ -16121,8 +17598,9 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::Insert(Insert { + Ok(Insert { insert_token: insert_token.into(), + optimizer_hints, or, table: table_object, table_alias, @@ -16137,18 +17615,24 @@ impl<'a> Parser<'a> { has_table_keyword: table, on, returning, + output, replace_into, priority, insert_alias, settings, format_clause, - })) + multi_table_insert_type: None, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, + } + .into()) } } - // Parses input format clause used for [ClickHouse]. - // - // + /// Parses input format clause used for ClickHouse. + /// + /// pub fn parse_input_format_clause(&mut self) -> Result { let ident = self.parse_identifier()?; let values = self @@ -16161,9 +17645,44 @@ impl<'a> Parser<'a> { /// Returns true if the immediate tokens look like the /// beginning of a subquery. `(SELECT ...` fn peek_subquery_start(&mut self) -> bool { - let [maybe_lparen, maybe_select] = self.peek_tokens(); - Token::LParen == maybe_lparen - && matches!(maybe_select, Token::Word(w) if w.keyword == Keyword::SELECT) + matches!( + self.peek_tokens_ref(), + [ + TokenWithSpan { + token: Token::LParen, + .. + }, + TokenWithSpan { + token: Token::Word(Word { + keyword: Keyword::SELECT, + .. + }), + .. + }, + ] + ) + } + + /// Returns true if the immediate tokens look like the + /// beginning of a subquery possibly preceded by CTEs; + /// i.e. `(WITH ...` or `(SELECT ...`. + fn peek_subquery_or_cte_start(&mut self) -> bool { + matches!( + self.peek_tokens_ref(), + [ + TokenWithSpan { + token: Token::LParen, + .. + }, + TokenWithSpan { + token: Token::Word(Word { + keyword: Keyword::SELECT | Keyword::WITH, + .. + }), + .. + }, + ] + ) } fn parse_conflict_clause(&mut self) -> Option { @@ -16184,6 +17703,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional `PARTITION (...)` clause for INSERT statements. pub fn parse_insert_partition(&mut self) -> Result>, ParserError> { if self.parse_keyword(Keyword::PARTITION) { self.expect_token(&Token::LParen)?; @@ -16195,6 +17715,7 @@ impl<'a> Parser<'a> { } } + /// Parse optional Hive `INPUTFORMAT ... SERDE ...` clause used by LOAD DATA. pub fn parse_load_data_table_format( &mut self, ) -> Result, ParserError> { @@ -16221,7 +17742,9 @@ impl<'a> Parser<'a> { Ok(Box::new(SetExpr::Update(self.parse_update(update_token)?))) } + /// Parse an `UPDATE` statement and return `Statement::Update`. pub fn parse_update(&mut self, update_token: TokenWithSpan) -> Result { + let optimizer_hints = self.maybe_parse_optimizer_hints()?; let or = self.parse_conflict_clause(); let table = self.parse_table_and_joins()?; let from_before_set = if self.parse_keyword(Keyword::FROM) { @@ -16233,6 +17756,9 @@ impl<'a> Parser<'a> { }; self.expect_keyword(Keyword::SET)?; let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + + let output = self.maybe_parse_output_clause()?; + let from = if from_before_set.is_none() && self.parse_keyword(Keyword::FROM) { Some(UpdateTableFromKind::AfterSet( self.parse_table_with_joins()?, @@ -16257,11 +17783,13 @@ impl<'a> Parser<'a> { }; Ok(Update { update_token: update_token.into(), + optimizer_hints, table, assignments, from, selection, returning, + output, or, limit, } @@ -16288,6 +17816,7 @@ impl<'a> Parser<'a> { } } + /// Parse a single function argument, handling named and unnamed variants. pub fn parse_function_args(&mut self) -> Result { let arg = if self.dialect.supports_named_fn_args_with_expr_name() { self.maybe_parse(|p| { @@ -16315,7 +17844,26 @@ impl<'a> Parser<'a> { if let Some(arg) = arg { return Ok(arg); } - Ok(FunctionArg::Unnamed(self.parse_wildcard_expr()?.into())) + let wildcard_expr = self.parse_wildcard_expr()?; + let arg_expr: FunctionArgExpr = match wildcard_expr { + Expr::Wildcard(ref token) if self.dialect.supports_select_wildcard_exclude() => { + // Support `* EXCLUDE(col1, col2, ...)` inside function calls (e.g. Snowflake's + // `HASH(* EXCLUDE(col))`). Parse the options the same way SELECT items do. + let opts = self.parse_wildcard_additional_options(token.0.clone())?; + if opts.opt_exclude.is_some() + || opts.opt_except.is_some() + || opts.opt_replace.is_some() + || opts.opt_rename.is_some() + || opts.opt_ilike.is_some() + { + FunctionArgExpr::WildcardWithOptions(opts) + } else { + wildcard_expr.into() + } + } + other => other.into(), + }; + Ok(FunctionArg::Unnamed(arg_expr)) } fn parse_function_named_arg_operator(&mut self) -> Result { @@ -16347,6 +17895,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional, comma-separated list of function arguments (consumes closing paren). pub fn parse_optional_args(&mut self) -> Result, ParserError> { if self.consume_token(&Token::RParen) { Ok(vec![]) @@ -16434,7 +17983,9 @@ impl<'a> Parser<'a> { let kind = match self.expect_one_of_keywords(&[Keyword::MIN, Keyword::MAX])? { Keyword::MIN => HavingBoundKind::Min, Keyword::MAX => HavingBoundKind::Max, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in having bound"), + )), }; clauses.push(FunctionArgumentClause::Having(HavingBound( kind, @@ -16445,7 +17996,7 @@ impl<'a> Parser<'a> { if dialect_of!(self is GenericDialect | MySqlDialect) && self.parse_keyword(Keyword::SEPARATOR) { - clauses.push(FunctionArgumentClause::Separator(self.parse_value()?.value)); + clauses.push(FunctionArgumentClause::Separator(self.parse_value()?)); } if let Some(on_overflow) = self.parse_listagg_on_overflow()? { @@ -16492,7 +18043,7 @@ impl<'a> Parser<'a> { } fn parse_duplicate_treatment(&mut self) -> Result, ParserError> { - let loc = self.peek_token().span.start; + let loc = self.peek_token_ref().span.start; match ( self.parse_keyword(Keyword::ALL), self.parse_keyword(Keyword::DISTINCT), @@ -16524,7 +18075,7 @@ impl<'a> Parser<'a> { Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { parser_err!( format!("Expected an expression, found: {}", v), - self.peek_token().span.start + self.peek_token_ref().span.start ) } Expr::BinaryOp { @@ -16537,7 +18088,7 @@ impl<'a> Parser<'a> { let Expr::Identifier(alias) = *left else { return parser_err!( "BUG: expected identifier expression as alias", - self.peek_token().span.start + self.peek_token_ref().span.start ); }; Ok(SelectItem::ExprWithAlias { @@ -16573,7 +18124,7 @@ impl<'a> Parser<'a> { &mut self, wildcard_token: TokenWithSpan, ) -> Result { - let opt_ilike = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + let opt_ilike = if self.dialect.supports_select_wildcard_ilike() { self.parse_optional_select_item_ilike()? } else { None @@ -16589,18 +18140,27 @@ impl<'a> Parser<'a> { } else { None }; - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) - { + let opt_replace = if self.dialect.supports_select_wildcard_replace() { self.parse_optional_select_item_replace()? } else { None }; - let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + let opt_rename = if self.dialect.supports_select_wildcard_rename() { self.parse_optional_select_item_rename()? } else { None }; + let opt_alias = if self.dialect.supports_select_wildcard_with_alias() { + if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + None + } + } else { + None + }; + Ok(WildcardAdditionalOptions { wildcard_token: wildcard_token.into(), opt_ilike, @@ -16608,6 +18168,7 @@ impl<'a> Parser<'a> { opt_except, opt_rename, opt_replace, + opt_alias, }) } @@ -16638,11 +18199,12 @@ impl<'a> Parser<'a> { ) -> Result, ParserError> { let opt_exclude = if self.parse_keyword(Keyword::EXCLUDE) { if self.consume_token(&Token::LParen) { - let columns = self.parse_comma_separated(|parser| parser.parse_identifier())?; + let columns = + self.parse_comma_separated(|parser| parser.parse_object_name(false))?; self.expect_token(&Token::RParen)?; Some(ExcludeSelectItem::Multiple(columns)) } else { - let column = self.parse_identifier()?; + let column = self.parse_object_name(false)?; Some(ExcludeSelectItem::Single(column)) } } else { @@ -16659,13 +18221,13 @@ impl<'a> Parser<'a> { &mut self, ) -> Result, ParserError> { let opt_except = if self.parse_keyword(Keyword::EXCEPT) { - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { let idents = self.parse_parenthesized_column_list(Mandatory, false)?; match &idents[..] { [] => { - return self.expected( + return self.expected_ref( "at least one column should be parsed by the expect clause", - self.peek_token(), + self.peek_token_ref(), )?; } [first, idents @ ..] => Some(ExceptSelectItem { @@ -16730,6 +18292,7 @@ impl<'a> Parser<'a> { Ok(opt_replace) } + /// Parse a single element of a `REPLACE (...)` select-item clause. pub fn parse_replace_elements(&mut self) -> Result { let expr = self.parse_expr()?; let as_keyword = self.parse_keyword(Keyword::AS); @@ -16771,10 +18334,10 @@ impl<'a> Parser<'a> { fn parse_order_by_expr_inner( &mut self, with_operator_class: bool, - ) -> Result<(OrderByExpr, Option), ParserError> { + ) -> Result<(OrderByExpr, Option), ParserError> { let expr = self.parse_expr()?; - let operator_class: Option = if with_operator_class { + let operator_class: Option = if with_operator_class { // We check that if non of the following keywords are present, then we parse an // identifier as operator class. if self @@ -16783,7 +18346,7 @@ impl<'a> Parser<'a> { { None } else { - self.maybe_parse(|parser| parser.parse_identifier())? + self.maybe_parse(|parser| parser.parse_object_name(false))? } } else { None @@ -16791,7 +18354,7 @@ impl<'a> Parser<'a> { let options = self.parse_order_by_options()?; - let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) + let with_fill = if self.dialect.supports_with_fill() && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) { Some(self.parse_with_fill()?) @@ -16825,6 +18388,7 @@ impl<'a> Parser<'a> { // Parse a WITH FILL clause (ClickHouse dialect) // that follow the WITH FILL keywords in a ORDER BY clause + /// Parse a `WITH FILL` clause used in ORDER BY (ClickHouse dialect). pub fn parse_with_fill(&mut self) -> Result { let from = if self.parse_keyword(Keyword::FROM) { Some(self.parse_expr()?) @@ -16847,8 +18411,8 @@ impl<'a> Parser<'a> { Ok(WithFill { from, to, step }) } - // Parse a set of comma separated INTERPOLATE expressions (ClickHouse dialect) - // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier + /// Parse a set of comma separated INTERPOLATE expressions (ClickHouse dialect) + /// that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier pub fn parse_interpolations(&mut self) -> Result, ParserError> { if !self.parse_keyword(Keyword::INTERPOLATE) { return Ok(None); @@ -16868,7 +18432,7 @@ impl<'a> Parser<'a> { Ok(Some(Interpolate { exprs: None })) } - // Parse a INTERPOLATE expression (ClickHouse dialect) + /// Parse a INTERPOLATE expression (ClickHouse dialect) pub fn parse_interpolation(&mut self) -> Result { let column = self.parse_identifier()?; let expr = if self.parse_keyword(Keyword::AS) { @@ -16962,7 +18526,9 @@ impl<'a> Parser<'a> { let lock_type = match self.expect_one_of_keywords(&[Keyword::UPDATE, Keyword::SHARE])? { Keyword::UPDATE => LockType::Update, Keyword::SHARE => LockType::Share, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{UPDATE, SHARE}}, got {unexpected_keyword:?}"), + )), }; let of = if self.parse_keyword(Keyword::OF) { Some(self.parse_object_name(false)?) @@ -16983,6 +18549,67 @@ impl<'a> Parser<'a> { }) } + /// Parse a PostgreSQL `LOCK` statement. + pub fn parse_lock_statement(&mut self) -> Result { + self.expect_keyword(Keyword::LOCK)?; + + if self.peek_keyword(Keyword::TABLES) { + return self.expected_ref("TABLE or a table name", self.peek_token_ref()); + } + + let _ = self.parse_keyword(Keyword::TABLE); + let tables = self.parse_comma_separated(Parser::parse_lock_table_target)?; + let lock_mode = if self.parse_keyword(Keyword::IN) { + let lock_mode = self.parse_lock_table_mode()?; + self.expect_keyword(Keyword::MODE)?; + Some(lock_mode) + } else { + None + }; + let nowait = self.parse_keyword(Keyword::NOWAIT); + + Ok(Lock { + tables, + lock_mode, + nowait, + }) + } + + fn parse_lock_table_target(&mut self) -> Result { + let only = self.parse_keyword(Keyword::ONLY); + let name = self.parse_object_name(false)?; + let has_asterisk = self.consume_token(&Token::Mul); + + Ok(LockTableTarget { + name, + only, + has_asterisk, + }) + } + + fn parse_lock_table_mode(&mut self) -> Result { + if self.parse_keywords(&[Keyword::ACCESS, Keyword::SHARE]) { + Ok(LockTableMode::AccessShare) + } else if self.parse_keywords(&[Keyword::ACCESS, Keyword::EXCLUSIVE]) { + Ok(LockTableMode::AccessExclusive) + } else if self.parse_keywords(&[Keyword::ROW, Keyword::SHARE]) { + Ok(LockTableMode::RowShare) + } else if self.parse_keywords(&[Keyword::ROW, Keyword::EXCLUSIVE]) { + Ok(LockTableMode::RowExclusive) + } else if self.parse_keywords(&[Keyword::SHARE, Keyword::UPDATE, Keyword::EXCLUSIVE]) { + Ok(LockTableMode::ShareUpdateExclusive) + } else if self.parse_keywords(&[Keyword::SHARE, Keyword::ROW, Keyword::EXCLUSIVE]) { + Ok(LockTableMode::ShareRowExclusive) + } else if self.parse_keyword(Keyword::SHARE) { + Ok(LockTableMode::Share) + } else if self.parse_keyword(Keyword::EXCLUSIVE) { + Ok(LockTableMode::Exclusive) + } else { + self.expected_ref("a PostgreSQL LOCK TABLE mode", self.peek_token_ref()) + } + } + + /// Parse a VALUES clause pub fn parse_values( &mut self, allow_empty: bool, @@ -17012,6 +18639,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a 'START TRANSACTION' statement pub fn parse_start_transaction(&mut self) -> Result { self.expect_keyword_is(Keyword::TRANSACTION)?; Ok(Statement::StartTransaction { @@ -17025,8 +18653,9 @@ impl<'a> Parser<'a> { }) } - pub fn parse_begin(&mut self) -> Result { - let modifier = if !self.dialect.supports_start_transaction_modifier() { + /// Parse a transaction modifier keyword that can follow a `BEGIN` statement. + pub(crate) fn parse_transaction_modifier(&mut self) -> Option { + if !self.dialect.supports_start_transaction_modifier() { None } else if self.parse_keyword(Keyword::DEFERRED) { Some(TransactionModifier::Deferred) @@ -17040,12 +18669,20 @@ impl<'a> Parser<'a> { Some(TransactionModifier::Catch) } else { None - }; - let transaction = match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) { - Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), - Some(Keyword::WORK) => Some(BeginTransactionKind::Work), - _ => None, - }; + } + } + + /// Parse a 'BEGIN' statement + pub fn parse_begin(&mut self) -> Result { + let modifier = self.parse_transaction_modifier(); + let transaction = + match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK, Keyword::TRAN]) + { + Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), + Some(Keyword::WORK) => Some(BeginTransactionKind::Work), + Some(Keyword::TRAN) => Some(BeginTransactionKind::Tran), + _ => None, + }; Ok(Statement::StartTransaction { modes: self.parse_transaction_modes()?, begin: true, @@ -17057,6 +18694,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a 'BEGIN ... EXCEPTION ... END' block pub fn parse_begin_exception_end(&mut self) -> Result { let statements = self.parse_statement_list(&[Keyword::EXCEPTION, Keyword::END])?; @@ -17102,6 +18740,7 @@ impl<'a> Parser<'a> { }) } + /// Parse an 'END' statement pub fn parse_end(&mut self) -> Result { let modifier = if !self.dialect.supports_end_transaction_modifier() { None @@ -17119,6 +18758,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a list of transaction modes pub fn parse_transaction_modes(&mut self) -> Result, ParserError> { let mut modes = vec![]; let mut required = false; @@ -17135,7 +18775,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::SNAPSHOT) { TransactionIsolationLevel::Snapshot } else { - self.expected("isolation level", self.peek_token())? + self.expected_ref("isolation level", self.peek_token_ref())? }; TransactionMode::IsolationLevel(iso_level) } else if self.parse_keywords(&[Keyword::READ, Keyword::ONLY]) { @@ -17143,7 +18783,7 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::READ, Keyword::WRITE]) { TransactionMode::AccessMode(TransactionAccessMode::ReadWrite) } else if required { - self.expected("transaction mode", self.peek_token())? + self.expected_ref("transaction mode", self.peek_token_ref())? } else { break; }; @@ -17157,6 +18797,7 @@ impl<'a> Parser<'a> { Ok(modes) } + /// Parse a 'COMMIT' statement pub fn parse_commit(&mut self) -> Result { Ok(Statement::Commit { chain: self.parse_commit_rollback_chain()?, @@ -17165,6 +18806,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a 'ROLLBACK' statement pub fn parse_rollback(&mut self) -> Result { let chain = self.parse_commit_rollback_chain()?; let savepoint = self.parse_rollback_savepoint()?; @@ -17172,8 +18814,9 @@ impl<'a> Parser<'a> { Ok(Statement::Rollback { chain, savepoint }) } + /// Parse an optional `AND [NO] CHAIN` clause for `COMMIT` and `ROLLBACK` statements pub fn parse_commit_rollback_chain(&mut self) -> Result { - let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); + let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK, Keyword::TRAN]); if self.parse_keyword(Keyword::AND) { let chain = !self.parse_keyword(Keyword::NO); self.expect_keyword_is(Keyword::CHAIN)?; @@ -17183,6 +18826,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional 'TO SAVEPOINT savepoint_name' clause for ROLLBACK statements pub fn parse_rollback_savepoint(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::TO) { let _ = self.parse_keyword(Keyword::SAVEPOINT); @@ -17222,35 +18866,77 @@ impl<'a> Parser<'a> { }) } + /// Parse a single `RAISERROR` option pub fn parse_raiserror_option(&mut self) -> Result { match self.expect_one_of_keywords(&[Keyword::LOG, Keyword::NOWAIT, Keyword::SETERROR])? { Keyword::LOG => Ok(RaisErrorOption::Log), Keyword::NOWAIT => Ok(RaisErrorOption::NoWait), Keyword::SETERROR => Ok(RaisErrorOption::SetError), - _ => self.expected( + _ => self.expected_ref( "LOG, NOWAIT OR SETERROR raiserror option", - self.peek_token(), + self.peek_token_ref(), ), } } + /// Parse a MSSQL `THROW` statement. + /// + /// See [Statement::Throw] + pub fn parse_throw(&mut self) -> Result { + self.expect_keyword_is(Keyword::THROW)?; + + let error_number = self.maybe_parse(|p| p.parse_expr().map(Box::new))?; + let (message, state) = if error_number.is_some() { + self.expect_token(&Token::Comma)?; + let message = Box::new(self.parse_expr()?); + self.expect_token(&Token::Comma)?; + let state = Box::new(self.parse_expr()?); + (Some(message), Some(state)) + } else { + (None, None) + }; + + Ok(ThrowStatement { + error_number, + message, + state, + }) + } + + /// Parse a SQL `DEALLOCATE` statement pub fn parse_deallocate(&mut self) -> Result { let prepare = self.parse_keyword(Keyword::PREPARE); let name = self.parse_identifier()?; Ok(Statement::Deallocate { name, prepare }) } + /// Parse a SQL `EXECUTE` statement pub fn parse_execute(&mut self) -> Result { + // Track whether the procedure/expression name itself was wrapped in parens, + // i.e. `EXEC (@sql)` (dynamic string execution) vs `EXEC sp_name`. + // When the name has parens there are no additional parameters. let name = if self.dialect.supports_execute_immediate() && self.parse_keyword(Keyword::IMMEDIATE) { None } else { + let has_parentheses = self.consume_token(&Token::LParen); let name = self.parse_object_name(false)?; - Some(name) + if has_parentheses { + self.expect_token(&Token::RParen)?; + } + Some((name, has_parentheses)) }; - let has_parentheses = self.consume_token(&Token::LParen); + let name_had_parentheses = name.as_ref().map(|(_, p)| *p).unwrap_or(false); + + // Only look for a parameter list when the name was NOT wrapped in parens. + // `EXEC (@sql)` is dynamic SQL execution and takes no parameters here. + let has_parentheses = if name_had_parentheses { + false + } else { + self.consume_token(&Token::LParen) + }; let end_kws = &[Keyword::USING, Keyword::OUTPUT, Keyword::DEFAULT]; let end_token = match (has_parentheses, self.peek_token().token) { @@ -17260,12 +18946,18 @@ impl<'a> Parser<'a> { (false, _) => Token::SemiColon, }; - let parameters = self.parse_comma_separated0(Parser::parse_expr, end_token)?; + let parameters = if name_had_parentheses { + vec![] + } else { + self.parse_comma_separated0(Parser::parse_expr, end_token)? + }; if has_parentheses { self.expect_token(&Token::RParen)?; } + let name = name.map(|(n, _)| n); + let into = if self.parse_keyword(Keyword::INTO) { self.parse_comma_separated(Self::parse_identifier)? } else { @@ -17294,6 +18986,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a SQL `PREPARE` statement pub fn parse_prepare(&mut self) -> Result { let name = self.parse_identifier()?; @@ -17312,15 +19005,16 @@ impl<'a> Parser<'a> { }) } + /// Parse a SQL `UNLOAD` statement pub fn parse_unload(&mut self) -> Result { self.expect_keyword(Keyword::UNLOAD)?; self.expect_token(&Token::LParen)?; - let (query, query_text) = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) - { - (None, Some(self.parse_literal_string()?)) - } else { - (Some(self.parse_query()?), None) - }; + let (query, query_text) = + if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { + (None, Some(self.parse_literal_string()?)) + } else { + (Some(self.parse_query()?), None) + }; self.expect_token(&Token::RParen)?; self.expect_keyword_is(Keyword::TO)?; @@ -17345,153 +19039,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { - let mut clauses = vec![]; - loop { - if !(self.parse_keyword(Keyword::WHEN)) { - break; - } - let when_token = self.get_current_token().clone(); - - let mut clause_kind = MergeClauseKind::Matched; - if self.parse_keyword(Keyword::NOT) { - clause_kind = MergeClauseKind::NotMatched; - } - self.expect_keyword_is(Keyword::MATCHED)?; - - if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) - { - clause_kind = MergeClauseKind::NotMatchedBySource; - } else if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) - { - clause_kind = MergeClauseKind::NotMatchedByTarget; - } - - let predicate = if self.parse_keyword(Keyword::AND) { - Some(self.parse_expr()?) - } else { - None - }; - - self.expect_keyword_is(Keyword::THEN)?; - - let merge_clause = match self.parse_one_of_keywords(&[ - Keyword::UPDATE, - Keyword::INSERT, - Keyword::DELETE, - ]) { - Some(Keyword::UPDATE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("UPDATE is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - } - - let update_token = self.get_current_token().clone(); - self.expect_keyword_is(Keyword::SET)?; - MergeAction::Update { - update_token: update_token.into(), - assignments: self.parse_comma_separated(Parser::parse_assignment)?, - } - } - Some(Keyword::DELETE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("DELETE is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - }; - - let delete_token = self.get_current_token().clone(); - MergeAction::Delete { - delete_token: delete_token.into(), - } - } - Some(Keyword::INSERT) => { - if !matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("INSERT is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - }; - - let insert_token = self.get_current_token().clone(); - let is_mysql = dialect_of!(self is MySqlDialect); - - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::ROW) - { - (MergeInsertKind::Row, self.get_current_token().clone()) - } else { - self.expect_keyword_is(Keyword::VALUES)?; - let values_token = self.get_current_token().clone(); - let values = self.parse_values(is_mysql, false)?; - (MergeInsertKind::Values(values), values_token) - }; - MergeAction::Insert(MergeInsertExpr { - insert_token: insert_token.into(), - columns, - kind_token: kind_token.into(), - kind, - }) - } - _ => { - return parser_err!( - "expected UPDATE, DELETE or INSERT in merge clause", - self.peek_token_ref().span.start - ); - } - }; - clauses.push(MergeClause { - when_token: when_token.into(), - clause_kind, - predicate, - action: merge_clause, - }); - } - Ok(clauses) - } - - fn parse_output( - &mut self, - start_keyword: Keyword, - start_token: TokenWithSpan, - ) -> Result { - let select_items = self.parse_projection()?; - let into_table = if start_keyword == Keyword::OUTPUT && self.peek_keyword(Keyword::INTO) { - self.expect_keyword_is(Keyword::INTO)?; - Some(self.parse_select_into()?) - } else { - None - }; - - Ok(if start_keyword == Keyword::OUTPUT { - OutputClause::Output { - output_token: start_token.into(), - select_items, - into_table, - } - } else { - OutputClause::Returning { - returning_token: start_token.into(), - select_items, - } - }) - } - fn parse_select_into(&mut self) -> Result { let temporary = self .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) @@ -17508,46 +19055,21 @@ impl<'a> Parser<'a> { }) } - pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { - let into = self.parse_keyword(Keyword::INTO); - - let table = self.parse_table_factor()?; - - self.expect_keyword_is(Keyword::USING)?; - let source = self.parse_table_factor()?; - self.expect_keyword_is(Keyword::ON)?; - let on = self.parse_expr()?; - let clauses = self.parse_merge_clauses()?; - let output = match self.parse_one_of_keywords(&[Keyword::OUTPUT, Keyword::RETURNING]) { - Some(keyword) => Some(self.parse_output(keyword, self.get_current_token().clone())?), - None => None, - }; - - Ok(Statement::Merge { - merge_token: merge_token.into(), - into, - table, - source, - on: Box::new(on), - clauses, - output, - }) - } - - fn parse_pragma_value(&mut self) -> Result { - match self.parse_value()?.value { - v @ Value::SingleQuotedString(_) => Ok(v), - v @ Value::DoubleQuotedString(_) => Ok(v), - v @ Value::Number(_, _) => Ok(v), - v @ Value::Placeholder(_) => Ok(v), + fn parse_pragma_value(&mut self) -> Result { + let v = self.parse_value()?; + match &v.value { + Value::SingleQuotedString(_) => Ok(v), + Value::DoubleQuotedString(_) => Ok(v), + Value::Number(_, _) => Ok(v), + Value::Placeholder(_) => Ok(v), _ => { self.prev_token(); - self.expected("number or string or ? placeholder", self.peek_token()) + self.expected_ref("number or string or ? placeholder", self.peek_token_ref()) } } } - // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] + /// PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] pub fn parse_pragma(&mut self) -> Result { let name = self.parse_object_name(false)?; if self.consume_token(&Token::LParen) { @@ -17604,20 +19126,30 @@ impl<'a> Parser<'a> { table_format, }) } else { - self.expected( + self.expected_ref( "`DATA` or an extension name after `LOAD`", - self.peek_token(), + self.peek_token_ref(), ) } } + /// ClickHouse: /// ```sql /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] /// ``` /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) + /// + /// Databricks: + /// ```sql + /// OPTIMIZE table_name [WHERE predicate] [ZORDER BY (col_name1 [, ...])] + /// ``` + /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html) pub fn parse_optimize_table(&mut self) -> Result { - self.expect_keyword_is(Keyword::TABLE)?; + let has_table_keyword = self.parse_keyword(Keyword::TABLE); + let name = self.parse_object_name(false)?; + + // ClickHouse-specific options let on_cluster = self.parse_optional_on_cluster()?; let partition = if self.parse_keyword(Keyword::PARTITION) { @@ -17631,6 +19163,7 @@ impl<'a> Parser<'a> { }; let include_final = self.parse_keyword(Keyword::FINAL); + let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) { if self.parse_keyword(Keyword::BY) { Some(Deduplicate::ByExpression(self.parse_expr()?)) @@ -17641,12 +19174,31 @@ impl<'a> Parser<'a> { None }; + // Databricks-specific options + let predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + let zorder = if self.parse_keywords(&[Keyword::ZORDER, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + Some(columns) + } else { + None + }; + Ok(Statement::OptimizeTable { name, + has_table_keyword, on_cluster, partition, include_final, deduplicate, + predicate, + zorder, }) } @@ -17779,6 +19331,7 @@ impl<'a> Parser<'a> { self.index } + /// Parse a named window definition. pub fn parse_named_window(&mut self) -> Result { let ident = self.parse_identifier()?; self.expect_keyword_is(Keyword::AS)?; @@ -17788,12 +19341,13 @@ impl<'a> Parser<'a> { } else if self.dialect.supports_window_clause_named_window_reference() { NamedWindowExpr::NamedWindow(self.parse_identifier()?) } else { - return self.expected("(", self.peek_token()); + return self.expected_ref("(", self.peek_token_ref()); }; Ok(NamedWindowDefinition(ident, window_expr)) } + /// Parse `CREATE PROCEDURE` statement. pub fn parse_create_procedure(&mut self, or_alter: bool) -> Result { let name = self.parse_object_name(false)?; let params = self.parse_optional_procedure_parameters()?; @@ -17817,8 +19371,9 @@ impl<'a> Parser<'a> { }) } + /// Parse a window specification. pub fn parse_window_spec(&mut self) -> Result { - let window_name = match self.peek_token().token { + let window_name = match &self.peek_token_ref().token { Token::Word(word) if word.keyword == Keyword::NoKeyword => { self.parse_optional_ident()? } @@ -17851,6 +19406,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `CREATE TYPE` statement. pub fn parse_create_type(&mut self) -> Result { let name = self.parse_object_name(false)?; @@ -17887,7 +19443,7 @@ impl<'a> Parser<'a> { // CREATE TYPE name AS (attributes) - Composite self.parse_create_type_composite(name) } else { - self.expected("ENUM, RANGE, or '(' after AS", self.peek_token()) + self.expected_ref("ENUM, RANGE, or '(' after AS", self.peek_token_ref()) } } @@ -18002,7 +19558,7 @@ impl<'a> Parser<'a> { let name = self.parse_object_name(false)?; Ok(UserDefinedTypeRangeOption::MultirangeTypeName(name)) } - _ => self.expected("range option keyword", self.peek_token()), + _ => self.expected_ref("range option keyword", self.peek_token_ref()), } } @@ -18115,9 +19671,9 @@ impl<'a> Parser<'a> { Some(Keyword::DOUBLE) => Ok(UserDefinedTypeSqlDefinitionOption::Alignment( Alignment::Double, )), - _ => self.expected( + _ => self.expected_ref( "alignment value (char, int2, int4, or double)", - self.peek_token(), + self.peek_token_ref(), ), } } @@ -18142,9 +19698,9 @@ impl<'a> Parser<'a> { Some(Keyword::MAIN) => Ok(UserDefinedTypeSqlDefinitionOption::Storage( UserDefinedTypeStorage::Main, )), - _ => self.expected( + _ => self.expected_ref( "storage value (plain, external, extended, or main)", - self.peek_token(), + self.peek_token_ref(), ), } } @@ -18190,7 +19746,7 @@ impl<'a> Parser<'a> { self.parse_keyword(Keyword::TRUE) || !self.parse_keyword(Keyword::FALSE); Ok(UserDefinedTypeSqlDefinitionOption::Collatable(value)) } - _ => self.expected("SQL definition option keyword", self.peek_token()), + _ => self.expected_ref("SQL definition option keyword", self.peek_token_ref()), } } @@ -18223,6 +19779,21 @@ impl<'a> Parser<'a> { })) } + /// Parse [Statement::WaitFor] + /// + /// See: + fn parse_waitfor(&mut self) -> Result { + let wait_type = if self.parse_keyword(Keyword::DELAY) { + WaitForType::Delay + } else if self.parse_keyword(Keyword::TIME) { + WaitForType::Time + } else { + return self.expected_ref("DELAY or TIME", self.peek_token_ref()); + }; + let expr = self.parse_expr()?; + Ok(Statement::WaitFor(WaitForStatement { wait_type, expr })) + } + /// Parse [Statement::Return] fn parse_return(&mut self) -> Result { match self.maybe_parse(|p| p.parse_expr())? { @@ -18270,7 +19841,7 @@ impl<'a> Parser<'a> { let threshold = if self.parse_keyword(Keyword::TO) { let value = self.parse_value()?; self.expect_keyword(Keyword::PERCENT)?; - Some(value.value) + Some(value) } else { None }; @@ -18298,14 +19869,8 @@ impl<'a> Parser<'a> { /// Returns true if the next keyword indicates a sub query, i.e. SELECT or WITH fn peek_sub_query(&mut self) -> bool { - if self - .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + self.peek_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) .is_some() - { - self.prev_token(); - return true; - } - false } pub(crate) fn parse_show_stmt_options(&mut self) -> Result { @@ -18339,7 +19904,7 @@ impl<'a> Parser<'a> { Some(Keyword::FROM) => ShowStatementInClause::FROM, Some(Keyword::IN) => ShowStatementInClause::IN, None => return Ok(None), - _ => return self.expected("FROM or IN", self.peek_token()), + _ => return self.expected_ref("FROM or IN", self.peek_token_ref()), }; let (parent_type, parent_name) = match self.parse_one_of_keywords(&[ @@ -18374,9 +19939,9 @@ impl<'a> Parser<'a> { Keyword::TABLE => (Some(ShowStatementInParentType::Table), parent_name), Keyword::VIEW => (Some(ShowStatementInParentType::View), parent_name), _ => { - return self.expected( + return self.expected_ref( "one of ACCOUNT, DATABASE, SCHEMA, TABLE or VIEW", - self.peek_token(), + self.peek_token_ref(), ) } } @@ -18404,9 +19969,9 @@ impl<'a> Parser<'a> { })) } - fn maybe_parse_show_stmt_starts_with(&mut self) -> Result, ParserError> { + fn maybe_parse_show_stmt_starts_with(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::STARTS, Keyword::WITH]) { - Ok(Some(self.parse_value()?.value)) + Ok(Some(self.parse_value()?)) } else { Ok(None) } @@ -18420,9 +19985,9 @@ impl<'a> Parser<'a> { } } - fn maybe_parse_show_stmt_from(&mut self) -> Result, ParserError> { + fn maybe_parse_show_stmt_from(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::FROM) { - Ok(Some(self.parse_value()?.value)) + Ok(Some(self.parse_value()?)) } else { Ok(None) } @@ -18452,10 +20017,10 @@ impl<'a> Parser<'a> { if parenthesized { break; } else { - return self.expected(" another option or EOF", self.peek_token()); + return self.expected_ref(" another option or EOF", self.peek_token_ref()); } } - Token::EOF => break, + Token::EOF | Token::SemiColon => break, Token::Comma => { delimiter = KeyValueOptionsDelimiter::Comma; continue; @@ -18467,7 +20032,12 @@ impl<'a> Parser<'a> { self.prev_token(); break; } - _ => return self.expected("another option, EOF, Comma or ')'", self.peek_token()), + _ => { + return self.expected_ref( + "another option, EOF, SemiColon, Comma or ')'", + self.peek_token_ref(), + ) + } }; } @@ -18480,30 +20050,31 @@ impl<'a> Parser<'a> { key: &Word, ) -> Result { self.expect_token(&Token::Eq)?; - match self.peek_token().token { + let peeked_token = self.peek_token(); + match peeked_token.token { Token::SingleQuotedString(_) => Ok(KeyValueOption { option_name: key.value.clone(), - option_value: KeyValueOptionKind::Single(self.parse_value()?.into()), + option_value: KeyValueOptionKind::Single(self.parse_value()?), }), Token::Word(word) if word.keyword == Keyword::TRUE || word.keyword == Keyword::FALSE => { Ok(KeyValueOption { option_name: key.value.clone(), - option_value: KeyValueOptionKind::Single(self.parse_value()?.into()), + option_value: KeyValueOptionKind::Single(self.parse_value()?), }) } Token::Number(..) => Ok(KeyValueOption { option_name: key.value.clone(), - option_value: KeyValueOptionKind::Single(self.parse_value()?.into()), + option_value: KeyValueOptionKind::Single(self.parse_value()?), }), Token::Word(word) => { self.next_token(); Ok(KeyValueOption { option_name: key.value.clone(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - word.value.clone(), - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder(word.value.clone()).with_span(peeked_token.span), + ), }) } Token::LParen => { @@ -18516,13 +20087,10 @@ impl<'a> Parser<'a> { parser.expect_token(&Token::RParen)?; values })? { - Some(values) => { - let values = values.into_iter().map(|v| v.value).collect(); - Ok(KeyValueOption { - option_name: key.value.clone(), - option_value: KeyValueOptionKind::Multi(values), - }) - } + Some(values) => Ok(KeyValueOption { + option_name: key.value.clone(), + option_value: KeyValueOptionKind::Multi(values), + }), None => Ok(KeyValueOption { option_name: key.value.clone(), option_value: KeyValueOptionKind::KeyValueOptions(Box::new( @@ -18531,20 +20099,20 @@ impl<'a> Parser<'a> { }), } } - _ => self.expected("expected option value", self.peek_token()), + _ => self.expected_ref("expected option value", self.peek_token_ref()), } } /// Parses a RESET statement - fn parse_reset(&mut self) -> Result { + fn parse_reset(&mut self) -> Result { if self.parse_keyword(Keyword::ALL) { - return Ok(Statement::Reset(ResetStatement { reset: Reset::ALL })); + return Ok(ResetStatement { reset: Reset::ALL }); } let obj = self.parse_object_name(false)?; - Ok(Statement::Reset(ResetStatement { + Ok(ResetStatement { reset: Reset::ConfigurationParameter(obj), - })) + }) } } @@ -18560,7 +20128,11 @@ fn maybe_prefixed_expr(expr: Expr, prefix: Option) -> Expr { } impl Word { - #[deprecated(since = "0.54.0", note = "please use `into_ident` instead")] + /// Convert a reference to this word into an [`Ident`] by cloning the value. + /// + /// Use this method when you need to keep the original `Word` around. + /// If you can consume the `Word`, prefer [`into_ident`](Self::into_ident) instead + /// to avoid cloning. pub fn to_ident(&self, span: Span) -> Ident { Ident { value: self.value.clone(), @@ -18569,7 +20141,10 @@ impl Word { } } - /// Convert this word into an [`Ident`] identifier + /// Convert this word into an [`Ident`] identifier, consuming the `Word`. + /// + /// This avoids cloning the string value. If you need to keep the original + /// `Word`, use [`to_ident`](Self::to_ident) instead. pub fn into_ident(self, span: Span) -> Ident { Ident { value: self.value, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a2525e4729..a664a81c41 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,10 +29,10 @@ use alloc::{ vec, vec::Vec, }; -use core::iter::Peekable; use core::num::NonZeroU8; use core::str::Chars; use core::{cmp, fmt}; +use core::{iter::Peekable, str}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -46,7 +46,10 @@ use crate::dialect::{ SnowflakeDialect, }; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; -use crate::{ast::DollarQuotedString, dialect::HiveDialect}; +use crate::{ + ast::{DollarQuotedString, QuoteDelimitedString}, + dialect::HiveDialect, +}; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -98,6 +101,12 @@ pub enum Token { TripleDoubleQuotedRawStringLiteral(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), + /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), /// Unicode string literal: i.e: U&'first \000A second' @@ -294,6 +303,8 @@ impl fmt::Display for Token { Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""), Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), + Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f), + Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"), Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), @@ -393,23 +404,54 @@ impl fmt::Display for Token { } impl Token { + /// Create a `Token::Word` from an unquoted `keyword`. + /// + /// The lookup is case-insensitive; unknown values become `Keyword::NoKeyword`. pub fn make_keyword(keyword: &str) -> Self { Token::make_word(keyword, None) } + /// Create a `Token::Word` from `word` with an optional `quote_style`. + /// + /// When `quote_style` is `None`, the parser attempts a case-insensitive keyword + /// lookup and sets the `Word::keyword` accordingly. pub fn make_word(word: &str, quote_style: Option) -> Self { - let word_uppercase = word.to_uppercase(); Token::Word(Word { + keyword: keyword_lookup(word, quote_style), value: word.to_string(), quote_style, - keyword: if quote_style.is_none() { - let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str()); - keyword.map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x]) - } else { - Keyword::NoKeyword - }, }) } + + /// Like [`Self::make_word`] but takes ownership of the word `String`, + /// avoiding an extra allocation when the caller already has an owned value. + fn make_word_owned(word: String, quote_style: Option) -> Self { + Token::Word(Word { + keyword: keyword_lookup(&word, quote_style), + value: word, + quote_style, + }) + } +} + +/// Case-insensitive keyword lookup using binary search over [`ALL_KEYWORDS`]. +fn keyword_lookup(word: &str, quote_style: Option) -> Keyword { + if quote_style.is_some() { + return Keyword::NoKeyword; + } + ALL_KEYWORDS + .binary_search_by(|probe| { + let probe = probe.as_bytes(); + let word = word.as_bytes(); + for (p, w) in probe.iter().zip(word.iter()) { + let cmp = p.cmp(&w.to_ascii_uppercase()); + if cmp != core::cmp::Ordering::Equal { + return cmp; + } + } + probe.len().cmp(&word.len()) + }) + .map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x]) } /// A keyword (like SELECT) or an optionally quoted SQL identifier @@ -452,14 +494,27 @@ impl Word { } } +/// Represents whitespace in the input: spaces, newlines, tabs and comments. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Whitespace { + /// A single space character. Space, + /// A newline character. Newline, + /// A tab character. Tab, - SingleLineComment { comment: String, prefix: String }, + /// A single-line comment (e.g. `-- comment` or `# comment`). + /// The `comment` field contains the text, and `prefix` contains the comment prefix. + SingleLineComment { + /// The content of the comment (without the prefix). + comment: String, + /// The prefix used for the comment (for example `--` or `#`). + prefix: String, + }, + + /// A multi-line comment (without the `/* ... */` delimiters). MultiLineComment(String), } @@ -561,7 +616,9 @@ impl From<(u64, u64)> for Location { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Span { + /// Start `Location` (inclusive). pub start: Location, + /// End `Location` (inclusive). pub end: Location, } @@ -683,8 +740,11 @@ pub type TokenWithLocation = TokenWithSpan; #[derive(Debug, Clone, Hash, Ord, PartialOrd, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A `Token` together with its `Span` (location in the source). pub struct TokenWithSpan { + /// The token value. pub token: Token, + /// The span covering the token in the input. pub span: Span, } @@ -728,22 +788,29 @@ impl fmt::Display for TokenWithSpan { } } +/// A token together with its byte-offset range in the source string. pub struct TokenWithRange { + /// The token. pub token: Token, + /// The byte offset of the start of the token in the source string. pub start: usize, + /// The byte offset of the end of the token in the source string. pub end: usize, } impl TokenWithRange { + /// Creates a new [`TokenWithRange`] with the given token and byte-offset range. pub fn new(token: Token, start: usize, end: usize) -> Self { Self { token, start, end } } } -/// Tokenizer error +/// An error reported by the tokenizer, with a human-readable `message` and a `location`. #[derive(Debug, PartialEq, Eq)] pub struct TokenizerError { + /// A descriptive error message. pub message: String, + /// The `Location` where the error was detected. pub location: Location, } @@ -753,14 +820,13 @@ impl fmt::Display for TokenizerError { } } -#[cfg(feature = "std")] -impl std::error::Error for TokenizerError {} +impl core::error::Error for TokenizerError {} struct State<'a> { peekable: Peekable>, pub pos: usize, - pub line: u64, - pub col: u64, + line: u64, + col: u64, } impl State<'_> { @@ -786,6 +852,7 @@ impl State<'_> { self.peekable.peek() } + /// Return the current `Location` (line and column) pub fn location(&self) -> Location { Location { line: self.line, @@ -895,6 +962,7 @@ impl<'a> Tokenizer<'a> { Ok(twl.into_iter().map(|t| t.token).collect()) } + /// Tokenize the statement and produce a vector of tokens with their byte-offset ranges. pub fn tokenize_with_range(&mut self) -> Result, TokenizerError> { let mut tokens = Vec::::new(); let mut state = State { @@ -928,6 +996,16 @@ impl<'a> Tokenizer<'a> { pub fn tokenize_with_location_into_buf( &mut self, buf: &mut Vec, + ) -> Result<(), TokenizerError> { + self.tokenize_with_location_into_buf_with_mapper(buf, |token| token) + } + + /// Tokenize the statement and produce a vector of tokens, mapping each token + /// with provided `mapper` + pub fn tokenize_with_location_into_buf_with_mapper( + &mut self, + buf: &mut Vec, + mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan, ) -> Result<(), TokenizerError> { let mut state = State { peekable: self.query.chars().peekable(), @@ -940,10 +1018,67 @@ impl<'a> Tokenizer<'a> { while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? { let span = location.span_to(state.location()); - buf.push(TokenWithSpan { token, span }); + // Check if this is a multiline comment hint that should be expanded + match &token { + Token::Whitespace(Whitespace::MultiLineComment(comment)) + if self.dialect.supports_multiline_comment_hints() + && comment.starts_with('!') => + { + // Re-tokenize the hints and add them to the buffer + self.tokenize_comment_hints(comment, span, buf, &mut mapper)?; + } + _ => { + buf.push(mapper(TokenWithSpan { token, span })); + } + } + + location = state.location(); + } + Ok(()) + } + + /// Re-tokenize optimizer hints from a multiline comment and add them to the buffer. + /// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024` + fn tokenize_comment_hints( + &self, + comment: &str, + span: Span, + buf: &mut Vec, + mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan, + ) -> Result<(), TokenizerError> { + // Strip the leading '!' and any version digits (e.g., "50110") + let hint_content = comment + .strip_prefix('!') + .unwrap_or(comment) + .trim_start_matches(|c: char| c.is_ascii_digit()); + + // If there's no content after stripping, nothing to tokenize + if hint_content.is_empty() { + return Ok(()); + } + + // Create a new tokenizer for the hint content + let inner = Tokenizer::new(self.dialect, hint_content).with_unescape(self.unescape); + // Create a state for tracking position within the hint + let mut state = State { + peekable: hint_content.chars().peekable(), + pos: 0, + line: span.start.line, + col: span.start.column, + }; + + // Tokenize the hint content and add tokens to the buffer + let mut location = state.location(); + while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? { + let token_span = location.span_to(state.location()); + buf.push(mapper(TokenWithSpan { + token, + span: token_span, + })); location = state.location(); } + Ok(()) } @@ -971,7 +1106,7 @@ impl<'a> Tokenizer<'a> { return Ok(Some(Token::Number(s, false))); } - Ok(Some(Token::make_word(&word, None))) + Ok(Some(Token::make_word_owned(word, None))) } /// Get the next token or return None @@ -1029,7 +1164,7 @@ impl<'a> Tokenizer<'a> { _ => { // regular identifier starting with an "b" or "B" let s = self.tokenize_word(b, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } @@ -1056,7 +1191,7 @@ impl<'a> Tokenizer<'a> { _ => { // regular identifier starting with an "r" or "R" let s = self.tokenize_word(b, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } @@ -1072,13 +1207,35 @@ impl<'a> Tokenizer<'a> { self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } + Some(&q @ 'q') | Some(&q @ 'Q') + if self.dialect.supports_quote_delimited_string() => + { + chars.next(); // consume and check the next char + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[n, q]) + .map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(String::from_iter([n, q]), chars); + Ok(Some(Token::make_word_owned(s, None))) + } + } _ => { // regular identifier starting with an "N" let s = self.tokenize_word(n, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } + q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => { + chars.next(); // consume and check the next char + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[q]) + .map(|s| Some(Token::QuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(q, chars); + Ok(Some(Token::make_word_owned(s, None))) + } + } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => { let starting_loc = chars.location(); @@ -1092,7 +1249,7 @@ impl<'a> Tokenizer<'a> { _ => { // regular identifier starting with an "E" or "e" let s = self.tokenize_word(x, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } @@ -1111,7 +1268,7 @@ impl<'a> Tokenizer<'a> { } // regular identifier starting with an "U" or "u" let s = self.tokenize_word(x, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } // The spec only allows an uppercase 'X' to introduce a hex // string, but PostgreSQL, at least, allows a lowercase 'x' too. @@ -1126,7 +1283,7 @@ impl<'a> Tokenizer<'a> { _ => { // regular identifier starting with an "X" let s = self.tokenize_word(x, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } @@ -1175,7 +1332,7 @@ impl<'a> Tokenizer<'a> { // delimited (quoted) identifier quote_start if self.dialect.is_delimited_identifier_start(ch) => { let word = self.tokenize_quoted_identifier(quote_start, chars)?; - Ok(Some(Token::make_word(&word, Some(quote_start)))) + Ok(Some(Token::make_word_owned(word, Some(quote_start)))) } // Potentially nested delimited (quoted) identifier quote_start @@ -1199,7 +1356,7 @@ impl<'a> Tokenizer<'a> { let Some(nested_quote_start) = nested_quote_start else { let word = self.tokenize_quoted_identifier(quote_start, chars)?; - return Ok(Some(Token::make_word(&word, Some(quote_start)))); + return Ok(Some(Token::make_word_owned(word, Some(quote_start)))); }; let mut word = vec![]; @@ -1227,7 +1384,10 @@ impl<'a> Tokenizer<'a> { } chars.next(); // skip close delimiter - Ok(Some(Token::make_word(&word.concat(), Some(quote_start)))) + Ok(Some(Token::make_word_owned( + word.concat(), + Some(quote_start), + ))) } // numbers and period '0'..='9' | '.' => { @@ -1337,12 +1497,12 @@ impl<'a> Tokenizer<'a> { if !word.is_empty() { s += word.as_str(); - return Ok(Some(Token::make_word(s.as_str(), None))); + return Ok(Some(Token::make_word_owned(s, None))); } } else if prev_token == Some(&Token::Period) { // If the previous token was a period, thus not belonging to a number, // the value we have is part of an identifier. - return Ok(Some(Token::make_word(s.as_str(), None))); + return Ok(Some(Token::make_word_owned(s, None))); } } @@ -1366,7 +1526,11 @@ impl<'a> Tokenizer<'a> { Some('-') => { let mut is_comment = true; if self.dialect.requires_single_line_comment_whitespace() { - is_comment = Some(' ') == chars.peekable.clone().nth(1); + is_comment = chars + .peekable + .clone() + .nth(1) + .is_some_and(char::is_whitespace); } if is_comment { @@ -1510,6 +1674,9 @@ impl<'a> Tokenizer<'a> { chars.next(); match chars.peek() { Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship), + // `<=+` and `<=-` are not valid combined operators; treat `<=` as + // the operator and leave `+`/`-` to be tokenized separately. + Some('+') | Some('-') => Ok(Some(Token::LtEq)), _ => self.start_binop(chars, "<=", Token::LtEq), } } @@ -1529,13 +1696,15 @@ impl<'a> Tokenizer<'a> { } } Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft), + // `<+` is not a valid combined operator; treat `<` as the operator + // and leave `+` to be tokenized separately. + Some('+') => Ok(Some(Token::Lt)), Some('-') if self.dialect.supports_geometric_types() => { - chars.next(); // consume - match chars.peek() { - Some('>') => { - self.consume_for_binop(chars, "<->", Token::TwoWayArrow) - } - _ => self.start_binop_opt(chars, "<-", None), + if chars.peekable.clone().nth(1) == Some('>') { + chars.next(); // consume `-` + self.consume_for_binop(chars, "<->", Token::TwoWayArrow) + } else { + Ok(Some(Token::Lt)) } } Some('^') if self.dialect.supports_geometric_types() => { @@ -1762,13 +1931,13 @@ impl<'a> Tokenizer<'a> { } } Some('#') => self.consume_and_return(chars, Token::QuestionMarkSharp), - _ => self.consume_and_return(chars, Token::Question), + _ => Ok(Some(Token::Question)), } } '?' => { chars.next(); let s = peeking_take_while(chars, |ch| ch.is_numeric()); - Ok(Some(Token::Placeholder(String::from("?") + &s))) + Ok(Some(Token::Placeholder(format!("?{s}")))) } // identifier or keyword @@ -1917,7 +2086,7 @@ impl<'a> Tokenizer<'a> { } } } else { - return Ok(Token::Placeholder(String::from("$") + &value)); + return Ok(Token::Placeholder(format!("${value}"))); } } @@ -2072,6 +2241,61 @@ impl<'a> Tokenizer<'a> { ) } + /// Reads a quote delimited string expecting `chars.next()` to deliver a quote. + /// + /// See + fn tokenize_quote_delimited_string( + &self, + chars: &mut State, + // the prefix that introduced the possible literal or word, + // e.g. "Q" or "nq" + literal_prefix: &[char], + ) -> Result { + let literal_start_loc = chars.location(); + chars.next(); + + let start_quote_loc = chars.location(); + let (start_quote, end_quote) = match chars.next() { + None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { + return self.tokenizer_error( + start_quote_loc, + format!( + "Invalid space, tab, newline, or EOF after '{}''", + String::from_iter(literal_prefix) + ), + ); + } + Some(c) => ( + c, + match c { + '[' => ']', + '{' => '}', + '<' => '>', + '(' => ')', + c => c, + }, + ), + }; + + // read the string literal until the "quote character" following a by literal quote + let mut value = String::new(); + while let Some(ch) = chars.next() { + if ch == end_quote { + if let Some('\'') = chars.peek() { + chars.next(); // ~ consume the quote + return Ok(QuoteDelimitedString { + start_quote, + value, + end_quote, + }); + } + } + value.push(ch); + } + + self.tokenizer_error(literal_start_loc, "Unterminated string literal") + } + /// Read a quoted string. fn tokenize_quoted_string( &self, @@ -2186,7 +2410,6 @@ impl<'a> Tokenizer<'a> { let mut s = String::new(); let mut nested = 1; let supports_nested_comments = self.dialect.supports_nested_comments(); - loop { match chars.next() { Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => { @@ -2495,9 +2718,10 @@ fn take_char_from_hex_digits( mod tests { use super::*; use crate::dialect::{ - BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect, + BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, + PostgreSqlDialect, SQLiteDialect, }; - use crate::test_utils::{all_dialects_except, all_dialects_where}; + use crate::test_utils::{all_dialects, all_dialects_except, all_dialects_where}; use core::fmt::Debug; #[test] @@ -2506,9 +2730,8 @@ mod tests { message: "test".into(), location: Location { line: 1, column: 1 }, }; - #[cfg(feature = "std")] { - use std::error::Error; + use core::error::Error; assert!(err.source().is_none()); } assert_eq!(err.to_string(), "test at Line: 1, Column: 1"); @@ -2544,6 +2767,38 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_with_mapper() { + let sql = String::from("SELECT ?"); + let dialect = GenericDialect {}; + let mut param_num = 1; + + let mut tokens = vec![]; + Tokenizer::new(&dialect, &sql) + .tokenize_with_location_into_buf_with_mapper(&mut tokens, |mut token_span| { + token_span.token = match token_span.token { + Token::Placeholder(n) => Token::Placeholder(if n == "?" { + let ret = format!("${}", param_num); + param_num += 1; + ret + } else { + n + }), + token => token, + }; + token_span + }) + .unwrap(); + let actual = tokens.into_iter().map(|t| t.token).collect(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$1".to_string()), + ]; + + compare(expected, actual); + } + #[test] fn tokenize_clickhouse_double_equal() { let sql = String::from("SELECT foo=='1'"); @@ -4032,6 +4287,24 @@ mod tests { Token::Minus, ], ); + + all_dialects_where(|d| d.requires_single_line_comment_whitespace()).tokenizes_to( + "--\n-- Table structure for table...\n--\n", + vec![ + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "\n".to_string(), + }), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: " Table structure for table...\n".to_string(), + }), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "\n".to_string(), + }), + ], + ); } #[test] @@ -4139,6 +4412,195 @@ mod tests { } } + #[test] + fn tokenize_question_mark() { + let dialect = PostgreSqlDialect {}; + let sql = "SELECT x ? y"; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + compare( + tokens, + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::make_word("x", None), + Token::Whitespace(Whitespace::Space), + Token::Question, + Token::Whitespace(Whitespace::Space), + Token::make_word("y", None), + ], + ); + } + + #[test] + fn tokenize_multiline_comment_with_comment_hint() { + let sql = String::from("0/*! word */1"); + + let dialect = MySqlDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let expected = vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Word(Word { + value: "word".to_string(), + quote_style: None, + keyword: Keyword::NoKeyword, + }), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ]; + compare(expected, tokens); + } + + #[test] + fn tokenize_multiline_comment_with_comment_hint_and_version() { + let sql_multi = String::from("0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1"); + let dialect = MySqlDialect {}; + let tokens = Tokenizer::new(&dialect, &sql_multi).tokenize().unwrap(); + let expected = vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Word(Word { + value: "KEY_BLOCK_SIZE".to_string(), + quote_style: None, + keyword: Keyword::KEY_BLOCK_SIZE, + }), + Token::Whitespace(Whitespace::Space), + Token::Eq, + Token::Whitespace(Whitespace::Space), + Token::Number("1024".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ]; + compare(expected, tokens); + + let tokens = Tokenizer::new(&dialect, "0 /*!50110 */ 1") + .tokenize() + .unwrap(); + compare( + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ], + tokens, + ); + + let tokens = Tokenizer::new(&dialect, "0 /*!*/ 1").tokenize().unwrap(); + compare( + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ], + tokens, + ); + let tokens = Tokenizer::new(&dialect, "0 /*! */ 1").tokenize().unwrap(); + compare( + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ], + tokens, + ); + } + + #[test] + fn tokenize_lt() { + all_dialects().tokenizes_to( + "select a <-50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Minus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects().tokenizes_to( + "select a <+50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Plus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects().tokenizes_to( + "select a <=-50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::LtEq, + Token::Minus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects().tokenizes_to( + "select a <=+50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::LtEq, + Token::Plus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects_where(|d| d.supports_geometric_types()).tokenizes_to( + "select a <->b", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::TwoWayArrow, + Token::make_word("b", None), + ], + ); + + all_dialects().tokenizes_to( + "select a <-b", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Minus, + Token::make_word("b", None), + ], + ); + all_dialects().tokenizes_to( + "select a <+b", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Plus, + Token::make_word("b", None), + ], + ); + } + #[test] fn tokenize_mustache_dot_ident() { all_dialects_where(|d| d.is_identifier_start('_')).tokenizes_to( diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index f2b9f2affe..79db34b06e 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -591,6 +591,16 @@ fn parse_create_table_with_options() { bigquery().verified_stmt(sql); } +#[test] +fn parse_create_external_table_with_options() { + bigquery().verified_stmt( + "CREATE EXTERNAL TABLE dataset_id.table1 (hvr_tx_seq STRING) OPTIONS(format = 'CSV')", + ); + bigquery().verified_stmt( + "CREATE EXTERNAL TABLE dataset_id.table1 (hvr_tx_seq STRING) OPTIONS(format = 'CSV', allow_quoted_newlines = true, encoding = 'UTF8')", + ); +} + #[test] fn parse_nested_data_types() { let sql = "CREATE TABLE table (x STRUCT, b BYTES(42)>, y ARRAY>)"; @@ -1739,7 +1749,7 @@ fn parse_table_time_travel() { args: None, with_hints: vec![], version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( - Value::SingleQuotedString(version).with_empty_span() + Value::SingleQuotedString(version.clone()).with_empty_span() ))), partitions: vec![], with_ordinality: false, @@ -1806,15 +1816,16 @@ fn parse_merge() { ); let insert_action = MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity")], + columns: vec![Ident::new("product").into(), Ident::new("quantity").into()], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, explicit_row: false, rows: vec![vec![Expr::value(number("1")), Expr::value(number("2"))]], }), + insert_predicate: None, }); - let update_action = MergeAction::Update { + let update_action = MergeAction::Update(MergeUpdateExpr { update_token: AttachedToken::empty(), assignments: vec![ Assignment { @@ -1826,17 +1837,19 @@ fn parse_merge() { value: Expr::value(number("2")), }, ], - }; + update_predicate: None, + delete_predicate: None, + }); match bigquery_and_generic().verified_stmt(sql) { - Statement::Merge { + Statement::Merge(Merge { into, table, source, on, clauses, .. - } => { + }) => { assert!(!into); assert_eq!( TableFactor::Table { @@ -1917,9 +1930,13 @@ fn parse_merge() { predicate: Some(Expr::value(number("1"))), action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity"),], + columns: vec![ + Ident::new("product").into(), + Ident::new("quantity").into(), + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1928,9 +1945,13 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity"),], + columns: vec![ + Ident::new("product").into(), + Ident::new("quantity").into(), + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1941,7 +1962,8 @@ fn parse_merge() { insert_token: AttachedToken::empty(), columns: vec![], kind_token: AttachedToken::empty(), - kind: MergeInsertKind::Row + kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1952,7 +1974,8 @@ fn parse_merge() { insert_token: AttachedToken::empty(), columns: vec![], kind_token: AttachedToken::empty(), - kind: MergeInsertKind::Row + kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1975,7 +1998,7 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("a"), Ident::new("b"),], + columns: vec![Ident::new("a").into(), Ident::new("b").into(),], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, @@ -1984,7 +2007,8 @@ fn parse_merge() { Expr::value(number("1")), Expr::Identifier(Ident::new("DEFAULT")), ]] - }) + }), + insert_predicate: None, }) }, MergeClause { @@ -2002,7 +2026,8 @@ fn parse_merge() { Expr::value(number("1")), Expr::Identifier(Ident::new("DEFAULT")), ]] - }) + }), + insert_predicate: None, }) }, ], @@ -2264,7 +2289,7 @@ fn test_bigquery_create_function() { Ident::new("myfunction"), ]), args: Some(vec![OperateFunctionArg::with_name("x", DataType::Float64),]), - return_type: Some(DataType::Float64), + return_type: Some(FunctionReturnType::DataType(DataType::Float64)), function_body: Some(CreateFunctionBody::AsAfterOptions(Expr::Value( number("42").with_empty_span() ))), @@ -2279,6 +2304,8 @@ fn test_bigquery_create_function() { remote_connection: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], }) ); @@ -2664,7 +2691,9 @@ fn test_export_data() { }), Span::empty() )), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -2692,7 +2721,7 @@ fn test_export_data() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: Some(OrderBy { @@ -2768,7 +2797,9 @@ fn test_export_data() { }), Span::empty() )), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -2796,7 +2827,7 @@ fn test_export_data() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: Some(OrderBy { @@ -2869,3 +2900,25 @@ fn test_alter_schema() { bigquery_and_generic() .verified_stmt("ALTER SCHEMA IF EXISTS mydataset SET OPTIONS (location = 'us')"); } + +#[test] +fn test_create_snapshot_table() { + bigquery_and_generic() + .verified_stmt("CREATE SNAPSHOT TABLE dataset_id.table1 CLONE dataset_id.table2"); + + bigquery().verified_stmt( + "CREATE SNAPSHOT TABLE IF NOT EXISTS dataset_id.table1 CLONE dataset_id.table2", + ); + + bigquery().verified_stmt( + "CREATE SNAPSHOT TABLE dataset_id.table1 CLONE dataset_id.table2 FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)", + ); + + bigquery().verified_stmt( + "CREATE SNAPSHOT TABLE dataset_id.table1 CLONE dataset_id.table2 OPTIONS(expiration_timestamp = TIMESTAMP '2025-01-01 00:00:00 UTC', friendly_name = 'my_table')", + ); + + bigquery().verified_stmt( + "CREATE SNAPSHOT TABLE IF NOT EXISTS dataset_id.table1 CLONE dataset_id.table2 FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR) OPTIONS(expiration_timestamp = TIMESTAMP '2025-01-01 00:00:00 UTC')", + ); +} diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 44bfcda426..82f79577b9 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -40,8 +40,10 @@ fn parse_map_access_expr() { let select = clickhouse().verified_only_select(sql); assert_eq!( Select { - distinct: None, select_token: AttachedToken::empty(), + optimizer_hints: vec![], + distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![UnnamedExpr(Expr::CompoundFieldAccess { @@ -101,7 +103,7 @@ fn parse_map_access_expr() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }, select diff --git a/tests/sqlparser_comments.rs b/tests/sqlparser_comments.rs new file mode 100644 index 0000000000..34442ca3e0 --- /dev/null +++ b/tests/sqlparser_comments.rs @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![warn(clippy::all)] +//! Test comment extraction from SQL source code. + +#[cfg(test)] +use pretty_assertions::assert_eq; + +use sqlparser::{ + ast::comments::{Comment, CommentWithSpan}, + dialect::GenericDialect, + parser::Parser, + tokenizer::Span, +}; + +#[test] +fn parse_sql_with_comments() { + let sql = r#" +-- second line comment +select * from /* inline comment after `from` */ dual; + +/*select +some +more*/ + + -- end-of-script-with-no-newline"#; + + let comments = match Parser::parse_sql_with_comments(&GenericDialect, sql) { + Ok((_, comments)) => comments, + Err(e) => panic!("Invalid sql script: {e}"), + }; + + assert_eq!( + Vec::from(comments), + vec![ + CommentWithSpan { + comment: Comment::SingleLine { + content: " second line comment\n".into(), + prefix: "--".into() + }, + span: Span::new((2, 1).into(), (3, 1).into()), + }, + CommentWithSpan { + comment: Comment::MultiLine(" inline comment after `from` ".into()), + span: Span::new((3, 15).into(), (3, 48).into()), + }, + CommentWithSpan { + comment: Comment::MultiLine("select\nsome\nmore".into()), + span: Span::new((5, 1).into(), (7, 7).into()) + }, + CommentWithSpan { + comment: Comment::SingleLine { + content: " end-of-script-with-no-newline".into(), + prefix: "--".into() + }, + span: Span::new((9, 3).into(), (9, 35).into()), + } + ] + ); +} diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6104fe76e8..9055f93aa0 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -72,9 +72,7 @@ fn parse_numeric_literal_underscore() { assert_eq!( select.projection, - vec![UnnamedExpr(Expr::Value( - (number("10_000")).with_empty_span() - ))] + vec![UnnamedExpr(Expr::Value(number("10_000").with_empty_span()))] ); } @@ -155,7 +153,10 @@ fn parse_insert_values() { assert_eq!(table_name.to_string(), expected_table_name); assert_eq!(columns.len(), expected_columns.len()); for (index, column) in columns.iter().enumerate() { - assert_eq!(column, &Ident::new(expected_columns[index].clone())); + assert_eq!( + column, + &ObjectName::from(Ident::new(expected_columns[index].clone())) + ); } match *source.body { SetExpr::Values(Values { @@ -457,6 +458,7 @@ fn parse_update_set_from() { stmt, Statement::Update(Update { update_token: AttachedToken::empty(), + optimizer_hints: vec![], table: TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::new("t1")])), joins: vec![], @@ -472,7 +474,9 @@ fn parse_update_set_from() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -500,7 +504,7 @@ fn parse_update_set_from() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -512,7 +516,8 @@ fn parse_update_set_from() { format_clause: None, pipe_operators: vec![], }), - alias: table_alias(true, "t2") + alias: table_alias(true, "t2"), + sample: None, }, joins: vec![] }])), @@ -528,6 +533,7 @@ fn parse_update_set_from() { ])), }), returning: None, + output: None, or: None, limit: None }) @@ -549,8 +555,10 @@ fn parse_update_with_table_alias() { returning, or: None, limit: None, + optimizer_hints, update_token: _, - }) => { + output: _, + }) if optimizer_hints.is_empty() => { assert_eq!( TableWithJoins { relation: TableFactor::Table { @@ -1040,18 +1048,18 @@ fn parse_outer_join_operator() { #[test] fn parse_select_distinct_on() { let sql = "SELECT DISTINCT ON (album_id) name FROM track ORDER BY album_id, milliseconds"; - let select = verified_only_select(sql); + let select = all_dialects_except(|d| d.is::()).verified_only_select(sql); assert_eq!( &Some(Distinct::On(vec![Expr::Identifier(Ident::new("album_id"))])), &select.distinct ); let sql = "SELECT DISTINCT ON () name FROM track ORDER BY milliseconds"; - let select = verified_only_select(sql); + let select = all_dialects_except(|d| d.is::()).verified_only_select(sql); assert_eq!(&Some(Distinct::On(vec![])), &select.distinct); let sql = "SELECT DISTINCT ON (album_id, milliseconds) name FROM track"; - let select = verified_only_select(sql); + let select = all_dialects_except(|d| d.is::()).verified_only_select(sql); assert_eq!( &Some(Distinct::On(vec![ Expr::Identifier(Ident::new("album_id")), @@ -1072,14 +1080,24 @@ fn parse_select_distinct_missing_paren() { #[test] fn parse_select_all() { - one_statement_parses_to("SELECT ALL name FROM customer", "SELECT name FROM customer"); + verified_stmt("SELECT ALL name FROM customer"); } #[test] fn parse_select_all_distinct() { let result = parse_sql_statements("SELECT ALL DISTINCT name FROM customer"); assert_eq!( - ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()), + ParserError::ParserError("Cannot specify ALL then DISTINCT".to_string()), + result.unwrap_err(), + ); + let result = parse_sql_statements("SELECT DISTINCT ALL name FROM customer"); + assert_eq!( + ParserError::ParserError("Cannot specify DISTINCT then ALL".to_string()), + result.unwrap_err(), + ); + let result = parse_sql_statements("SELECT ALL DISTINCT ON(name) name FROM customer"); + assert_eq!( + ParserError::ParserError("Cannot specify ALL then DISTINCT".to_string()), result.unwrap_err(), ); } @@ -1267,6 +1285,26 @@ fn parse_select_expr_star() { dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T"); } +#[test] +fn parse_select_wildcard_with_alias() { + let dialects = all_dialects_where(|d| d.supports_select_wildcard_with_alias()); + + // qualified wildcard with alias + dialects + .parse_sql_statements("SELECT t.* AS all_cols FROM t") + .unwrap(); + + // unqualified wildcard with alias + dialects + .parse_sql_statements("SELECT * AS all_cols FROM t") + .unwrap(); + + // mixed: regular column + qualified wildcard with alias + dialects + .parse_sql_statements("SELECT a.id, b.* AS b_cols FROM a JOIN b ON (a.id = b.a_id)") + .unwrap(); +} + #[test] fn test_eof_after_as() { let res = parse_sql_statements("SELECT foo AS"); @@ -1650,6 +1688,7 @@ fn parse_json_ops_without_colon() { Arrow, all_dialects_except(|d| d.supports_lambda_functions()), ), + ("->", Arrow, pg_and_generic()), ("->>", LongArrow, all_dialects()), ("#>", HashArrow, pg_and_generic()), ("#>>", HashLongArrow, pg_and_generic()), @@ -2065,7 +2104,7 @@ fn parse_ilike() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::SingleQuotedString('^'.to_string())), + escape_char: Some(Value::SingleQuotedString('^'.to_string()).with_empty_span()), any: false, }, select.selection.unwrap() @@ -2129,7 +2168,7 @@ fn parse_like() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::SingleQuotedString('^'.to_string())), + escape_char: Some(Value::SingleQuotedString('^'.to_string()).with_empty_span()), any: false, }, select.selection.unwrap() @@ -2192,7 +2231,7 @@ fn parse_similar_to() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::SingleQuotedString('^'.to_string())), + escape_char: Some(Value::SingleQuotedString('^'.to_string()).with_empty_span()), }, select.selection.unwrap() ); @@ -2209,7 +2248,7 @@ fn parse_similar_to() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::Null), + escape_char: Some(Value::Null.with_empty_span()), }, select.selection.unwrap() ); @@ -2227,7 +2266,7 @@ fn parse_similar_to() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::SingleQuotedString('^'.to_string())), + escape_char: Some(Value::SingleQuotedString('^'.to_string()).with_empty_span()), })), select.selection.unwrap() ); @@ -2370,6 +2409,29 @@ fn parse_bitwise_ops() { } } +#[test] +fn parse_bitwise_shift_ops() { + let dialects = all_dialects_where(|d| d.supports_bitwise_shift_operators()); + let sql = "SELECT 1 << 2, 3 >> 4"; + let select = dialects.verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Value((number("1")).with_empty_span())), + op: BinaryOperator::PGBitwiseShiftLeft, + right: Box::new(Expr::Value((number("2")).with_empty_span())), + }), + select.projection[0] + ); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Value((number("3")).with_empty_span())), + op: BinaryOperator::PGBitwiseShiftRight, + right: Box::new(Expr::Value((number("4")).with_empty_span())), + }), + select.projection[1] + ); +} + #[test] fn parse_binary_any() { let select = verified_only_select("SELECT a = ANY(b)"); @@ -3005,6 +3067,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3017,6 +3080,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::TinyInt(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3048,6 +3112,7 @@ fn parse_cast() { length: 50, unit: None, })), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3060,6 +3125,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3072,6 +3138,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(Some(50)), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3084,6 +3151,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Binary(Some(50)), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3096,6 +3164,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Varbinary(Some(BinaryLength::IntegerLength { length: 50 })), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3108,6 +3177,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3120,6 +3190,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(Some(50)), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3132,6 +3203,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("details"))), data_type: DataType::JSONB, + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3147,6 +3219,7 @@ fn parse_try_cast() { kind: CastKind::TryCast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3252,7 +3325,9 @@ fn parse_ceil_scale() { assert_eq!( &Expr::Ceil { expr: Box::new(Expr::Identifier(Ident::new("d"))), - field: CeilFloorKind::Scale(Value::Number(bigdecimal::BigDecimal::from(2), false)), + field: CeilFloorKind::Scale( + Value::Number(bigdecimal::BigDecimal::from(2), false).with_empty_span() + ), }, expr_from_projection(only(&select.projection)), ); @@ -3261,7 +3336,7 @@ fn parse_ceil_scale() { assert_eq!( &Expr::Ceil { expr: Box::new(Expr::Identifier(Ident::new("d"))), - field: CeilFloorKind::Scale(Value::Number(2.to_string(), false)), + field: CeilFloorKind::Scale(Value::Number(2.to_string(), false).with_empty_span()), }, expr_from_projection(only(&select.projection)), ); @@ -3276,7 +3351,9 @@ fn parse_floor_scale() { assert_eq!( &Expr::Floor { expr: Box::new(Expr::Identifier(Ident::new("d"))), - field: CeilFloorKind::Scale(Value::Number(bigdecimal::BigDecimal::from(2), false)), + field: CeilFloorKind::Scale( + Value::Number(bigdecimal::BigDecimal::from(2), false).with_empty_span() + ), }, expr_from_projection(only(&select.projection)), ); @@ -3285,7 +3362,7 @@ fn parse_floor_scale() { assert_eq!( &Expr::Floor { expr: Box::new(Expr::Identifier(Ident::new("d"))), - field: CeilFloorKind::Scale(Value::Number(2.to_string(), false)), + field: CeilFloorKind::Scale(Value::Number(2.to_string(), false).with_empty_span()), }, expr_from_projection(only(&select.projection)), ); @@ -5420,6 +5497,42 @@ fn parse_explain_analyze_with_simple_select() { Some(AnalyzeFormatKind::Keyword(AnalyzeFormat::TEXT)), None, ); + + run_explain_analyze( + all_dialects(), + "EXPLAIN FORMAT=TEXT SELECT sqrt(id) FROM foo", + false, + false, + Some(AnalyzeFormatKind::Assignment(AnalyzeFormat::TEXT)), + None, + ); + + run_explain_analyze( + all_dialects(), + "EXPLAIN FORMAT=GRAPHVIZ SELECT sqrt(id) FROM foo", + false, + false, + Some(AnalyzeFormatKind::Assignment(AnalyzeFormat::GRAPHVIZ)), + None, + ); + + run_explain_analyze( + all_dialects(), + "EXPLAIN FORMAT=JSON SELECT sqrt(id) FROM foo", + false, + false, + Some(AnalyzeFormatKind::Assignment(AnalyzeFormat::JSON)), + None, + ); + + run_explain_analyze( + all_dialects(), + "EXPLAIN FORMAT=TREE SELECT sqrt(id) FROM foo", + false, + false, + Some(AnalyzeFormatKind::Assignment(AnalyzeFormat::TREE)), + None, + ); } #[test] @@ -5736,7 +5849,9 @@ fn test_parse_named_window() { let actual_select_only = dialects.verified_only_select(sql); let expected = Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -5875,7 +5990,7 @@ fn test_parse_named_window() { qualify: None, window_before_qualify: true, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }; assert_eq!(actual_select_only, expected); @@ -6447,6 +6562,7 @@ fn interval_disallow_interval_expr_double_colon() { fractional_seconds_precision: None, })), data_type: DataType::Text, + array: false, format: None, } ) @@ -6465,7 +6581,9 @@ fn parse_interval_and_or_xor() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![UnnamedExpr(Expr::Identifier(Ident { @@ -6547,7 +6665,7 @@ fn parse_interval_and_or_xor() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -7793,6 +7911,7 @@ fn parse_derived_tables() { lateral: false, subquery: Box::new(verified_query("(SELECT 1) UNION (SELECT 2)")), alias: table_alias(true, "t1"), + sample: None, }, joins: vec![Join { relation: table_from_name(ObjectName::from(vec!["t2".into()])), @@ -7990,23 +8109,46 @@ fn parse_trim() { parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')").unwrap_err() ); - //keep Snowflake/BigQuery TRIM syntax failing - let all_expected_snowflake = TestedDialects::new(vec![ - //Box::new(GenericDialect {}), - Box::new(PostgreSqlDialect {}), - Box::new(MsSqlDialect {}), - Box::new(AnsiDialect {}), - //Box::new(SnowflakeDialect {}), - Box::new(HiveDialect {}), - Box::new(RedshiftSqlDialect {}), - Box::new(MySqlDialect {}), - //Box::new(BigQueryDialect {}), - Box::new(SQLiteDialect {}), - ]); + // dialects that support comma-separated TRIM syntax + let dialects = all_dialects_where(|d| d.supports_comma_separated_trim()); + + let sql = "SELECT TRIM(' xyz ', ' ')"; + let select = dialects.verified_only_select(sql); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value( + Value::SingleQuotedString(" xyz ".to_owned()).with_empty_span() + )), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value( + Value::SingleQuotedString(" ".to_owned()).with_empty_span() + )]), + }, + expr_from_projection(only(&select.projection)) + ); + + let sql = "SELECT TRIM('xyz', 'a')"; + let select = dialects.verified_only_select(sql); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value( + Value::SingleQuotedString("xyz".to_owned()).with_empty_span() + )), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value( + Value::SingleQuotedString("a".to_owned()).with_empty_span() + )]), + }, + expr_from_projection(only(&select.projection)) + ); + // dialects without comma-style TRIM syntax should fail + let unsupported_dialects = all_dialects_where(|d| !d.supports_comma_separated_trim()); assert_eq!( - ParserError::ParserError("Expected: ), found: 'a'".to_owned()), - all_expected_snowflake + ParserError::ParserError("Expected: ), found: ,".to_owned()), + unsupported_dialects .parse_sql_statements("SELECT TRIM('xyz', 'a')") .unwrap_err() ); @@ -8180,6 +8322,7 @@ fn parse_create_view() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); @@ -8298,6 +8441,7 @@ fn parse_create_view_temporary() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); @@ -8339,6 +8483,7 @@ fn parse_create_or_replace_view() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("v", name.to_string()); @@ -8384,6 +8529,7 @@ fn parse_create_or_replace_materialized_view() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("v", name.to_string()); @@ -8425,6 +8571,7 @@ fn parse_create_materialized_view() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); @@ -8466,6 +8613,7 @@ fn parse_create_materialized_view_with_cluster_by() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); @@ -8801,6 +8949,7 @@ fn lateral_derived() { lateral, ref subquery, alias: Some(ref alias), + sample: _, } = join.relation { assert_eq!(lateral_in, lateral); @@ -8839,7 +8988,9 @@ fn lateral_function() { let actual_select_only = verified_only_select(sql); let expected = Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], exclude: None, @@ -8881,7 +9032,7 @@ fn lateral_function() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }; assert_eq!(actual_select_only, expected); @@ -9162,6 +9313,7 @@ fn parse_double_colon_cast_at_timezone() { .with_empty_span() )), data_type: DataType::Timestamp(None, TimezoneInfo::None), + array: false, format: None }), time_zone: Box::new(Expr::Value( @@ -9527,14 +9679,14 @@ fn parse_drop_role() { fn parse_grant() { let sql = "GRANT SELECT, INSERT, UPDATE (shape, size), USAGE, DELETE, TRUNCATE, REFERENCES, TRIGGER, CONNECT, CREATE, EXECUTE, TEMPORARY, DROP ON abc, def TO xyz, m WITH GRANT OPTION GRANTED BY jj"; match verified_stmt(sql) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, grantees, with_grant_option, granted_by, .. - } => match (privileges, objects) { + }) => match (privileges, objects) { (Privileges::Actions(actions), Some(GrantObjects::Tables(objects))) => { assert_eq!( vec![ @@ -9579,13 +9731,13 @@ fn parse_grant() { let sql2 = "GRANT INSERT ON ALL TABLES IN SCHEMA public TO browser"; match verified_stmt(sql2) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, grantees, with_grant_option, .. - } => match (privileges, objects) { + }) => match (privileges, objects) { (Privileges::Actions(actions), Some(GrantObjects::AllTablesInSchema { schemas })) => { assert_eq!(vec![Action::Insert { columns: None }], actions); assert_eq_vec(&["public"], &schemas); @@ -9599,13 +9751,13 @@ fn parse_grant() { let sql3 = "GRANT USAGE, SELECT ON SEQUENCE p TO u"; match verified_stmt(sql3) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, grantees, granted_by, .. - } => match (privileges, objects, granted_by) { + }) => match (privileges, objects, granted_by) { (Privileges::Actions(actions), Some(GrantObjects::Sequences(objects)), None) => { assert_eq!( vec![Action::Usage, Action::Select { columns: None }], @@ -9621,7 +9773,7 @@ fn parse_grant() { let sql4 = "GRANT ALL PRIVILEGES ON aa, b TO z"; match verified_stmt(sql4) { - Statement::Grant { privileges, .. } => { + Statement::Grant(Grant { privileges, .. }) => { assert_eq!( Privileges::All { with_privileges_keyword: true @@ -9634,11 +9786,11 @@ fn parse_grant() { let sql5 = "GRANT ALL ON SCHEMA aa, b TO z"; match verified_stmt(sql5) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, .. - } => match (privileges, objects) { + }) => match (privileges, objects) { ( Privileges::All { with_privileges_keyword, @@ -9655,11 +9807,11 @@ fn parse_grant() { let sql6 = "GRANT USAGE ON ALL SEQUENCES IN SCHEMA bus TO a, beta WITH GRANT OPTION"; match verified_stmt(sql6) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, .. - } => match (privileges, objects) { + }) => match (privileges, objects) { ( Privileges::Actions(actions), Some(GrantObjects::AllSequencesInSchema { schemas }), @@ -9740,13 +9892,13 @@ fn parse_deny() { fn test_revoke() { let sql = "REVOKE ALL PRIVILEGES ON users, auth FROM analyst"; match verified_stmt(sql) { - Statement::Revoke { + Statement::Revoke(Revoke { privileges, objects: Some(GrantObjects::Tables(tables)), grantees, granted_by, cascade, - } => { + }) => { assert_eq!( Privileges::All { with_privileges_keyword: true @@ -9766,13 +9918,13 @@ fn test_revoke() { fn test_revoke_with_cascade() { let sql = "REVOKE ALL PRIVILEGES ON users, auth FROM analyst CASCADE"; match all_dialects_except(|d| d.is::()).verified_stmt(sql) { - Statement::Revoke { + Statement::Revoke(Revoke { privileges, objects: Some(GrantObjects::Tables(tables)), grantees, granted_by, cascade, - } => { + }) => { assert_eq!( Privileges::All { with_privileges_keyword: true @@ -9794,22 +9946,22 @@ fn parse_merge() { let sql_no_into = "MERGE s.bar AS dest USING (SELECT * FROM s.foo) AS stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; match (verified_stmt(sql), verified_stmt(sql_no_into)) { ( - Statement::Merge { + Statement::Merge(Merge { into, table, source, on, clauses, .. - }, - Statement::Merge { + }), + Statement::Merge(Merge { into: no_into, table: table_no_into, source: source_no_into, on: on_no_into, clauses: clauses_no_into, .. - }, + }), ) => { assert!(into); assert!(!no_into); @@ -9839,7 +9991,9 @@ fn parse_merge() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::Wildcard( @@ -9866,7 +10020,7 @@ fn parse_merge() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -9879,6 +10033,7 @@ fn parse_merge() { pipe_operators: vec![], }), alias: table_alias(true, "stg"), + sample: None, } ); assert_eq!(source, source_no_into); @@ -9922,7 +10077,11 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("A"), Ident::new("B"), Ident::new("C")], + columns: vec![ + Ident::new("A").into(), + Ident::new("B").into(), + Ident::new("C").into() + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, @@ -9942,6 +10101,7 @@ fn parse_merge() { ]), ]] }), + insert_predicate: None, }), }, MergeClause { @@ -9957,7 +10117,7 @@ fn parse_merge() { (Value::SingleQuotedString("a".to_string())).with_empty_span() )), }), - action: MergeAction::Update { + action: MergeAction::Update(MergeUpdateExpr { update_token: AttachedToken::empty(), assignments: vec![ Assignment { @@ -9981,7 +10141,9 @@ fn parse_merge() { ]), }, ], - }, + update_predicate: None, + delete_predicate: None, + }), }, MergeClause { when_token: AttachedToken::empty(), @@ -10000,6 +10162,45 @@ fn parse_merge() { let sql = "MERGE INTO s.bar AS dest USING newArrivals AS S ON (1 > 1) WHEN NOT MATCHED THEN INSERT VALUES (stg.A, stg.B, stg.C)"; verified_stmt(sql); + + // MERGE with predicates + let sql = "\ +MERGE INTO FOO \ +USING FOO_IMPORT \ +ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN MATCHED THEN \ +UPDATE SET FOO.NAME = FOO_IMPORT.NAME \ +WHERE 1 = 1 \ +DELETE WHERE FOO.NAME LIKE '%.DELETE' \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (FOO_IMPORT.ID, UPPER(FOO_IMPORT.NAME)) \ +WHERE NOT FOO_IMPORT.NAME LIKE '%.DO_NOT_INSERT'"; + all_dialects().verified_stmt(sql); + + // MERGE with simple insert columns + let sql = "\ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); + + // MERGE with qualified insert columns + let sql = "\ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (FOO.ID, FOO.NAME) \ +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); + + // MERGE with schema qualified insert columns + let sql = "\ +MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (PLAYGROUND.FOO.ID, PLAYGROUND.FOO.NAME) \ +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); } #[test] @@ -10881,6 +11082,13 @@ fn parse_deeply_nested_parens_hits_recursion_limits() { assert_eq!(ParserError::RecursionLimitExceeded, res.unwrap_err()); } +#[test] +fn parse_update_deeply_nested_parens_hits_recursion_limits() { + let sql = format!("\nUPDATE\n\n\n\n\n\n\n\n\n\n{}", "(".repeat(1000)); + let res = parse_sql_statements(&sql); + assert_eq!(ParserError::RecursionLimitExceeded, res.unwrap_err()); +} + #[test] fn parse_deeply_nested_unary_op_hits_recursion_limits() { let sql = format!("SELECT {}", "+".repeat(1000)); @@ -11211,6 +11419,18 @@ fn parse_pivot_table() { verified_stmt(multiple_value_columns_sql).to_string(), multiple_value_columns_sql ); + + // assert optional "AS" keyword for aliases for pivot values + one_statement_parses_to( + "SELECT * FROM t PIVOT(SUM(1) FOR a.abc IN (1 x, 'two' y, three z))", + "SELECT * FROM t PIVOT(SUM(1) FOR a.abc IN (1 AS x, 'two' AS y, three AS z))", + ); + + // assert optional "AS" keyword for aliases for pivot aggregate function + one_statement_parses_to( + "SELECT * FROM t PIVOT(SUM(1) x, COUNT(42) y FOR a.abc IN (1))", + "SELECT * FROM t PIVOT(SUM(1) AS x, COUNT(42) AS y FOR a.abc IN (1))", + ); } #[test] @@ -12077,6 +12297,8 @@ fn parse_execute_stored_procedure() { } _ => unreachable!(), } + // Test optional parentheses around procedure name + ms_and_generic().one_statement_parses_to("EXEC ('name')", "EXECUTE 'name'"); } #[test] @@ -12195,7 +12417,9 @@ fn parse_unload() { query: Some(Box::new(Query { body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![UnnamedExpr(Expr::Identifier(Ident::new("cola"))),], @@ -12217,7 +12441,7 @@ fn parse_unload() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), with: None, @@ -12501,52 +12725,7 @@ fn parse_map_access_expr() { #[test] fn parse_connect_by() { - let expect_query = Select { - select_token: AttachedToken::empty(), - distinct: None, - top: None, - top_before_distinct: false, - projection: vec![ - SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))), - SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))), - SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), - ], - exclude: None, - from: vec![TableWithJoins { - relation: table_from_name(ObjectName::from(vec![Ident::new("employees")])), - joins: vec![], - }], - into: None, - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - window_before_qualify: false, - value_table_mode: None, - connect_by: Some(ConnectBy { - condition: Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("title"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::Value( - Value::SingleQuotedString("president".to_owned()).with_empty_span(), - )), - }, - relationships: vec![Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("manager_id"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( - "employee_id", - ))))), - }], - }), - flavor: SelectFlavor::Standard, - }; + let dialects = all_dialects_where(|d| d.supports_connect_by()); let connect_by_1 = concat!( "SELECT employee_id, manager_id, title FROM employees ", @@ -12556,8 +12735,63 @@ fn parse_connect_by() { ); assert_eq!( - all_dialects_where(|d| d.supports_connect_by()).verified_only_select(connect_by_1), - expect_query + dialects.verified_only_select(connect_by_1), + Select { + select_token: AttachedToken::empty(), + optimizer_hints: vec![], + distinct: None, + select_modifiers: None, + top: None, + top_before_distinct: false, + projection: vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), + ], + exclude: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![Ident::new("employees")])), + joins: vec![], + }], + into: None, + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: vec![ + ConnectByKind::StartWith { + start_token: AttachedToken::empty(), + condition: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("title"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value( + Value::SingleQuotedString("president".to_owned()).with_empty_span(), + )), + } + .into() + }, + ConnectByKind::ConnectBy { + connect_token: AttachedToken::empty(), + nocycle: false, + relationships: vec![Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("manager_id"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( + "employee_id", + ))))), + }], + } + ], + flavor: SelectFlavor::Standard, + } ); // CONNECT BY can come before START WITH @@ -12568,9 +12802,63 @@ fn parse_connect_by() { "ORDER BY employee_id" ); assert_eq!( - all_dialects_where(|d| d.supports_connect_by()) - .verified_only_select_with_canonical(connect_by_2, connect_by_1), - expect_query + dialects.verified_only_select(connect_by_2), + Select { + select_token: AttachedToken::empty(), + optimizer_hints: vec![], + distinct: None, + select_modifiers: None, + top: None, + top_before_distinct: false, + projection: vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), + ], + exclude: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![Ident::new("employees")])), + joins: vec![], + }], + into: None, + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: vec![ + ConnectByKind::ConnectBy { + connect_token: AttachedToken::empty(), + nocycle: false, + relationships: vec![Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("manager_id"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( + "employee_id", + ))))), + }], + }, + ConnectByKind::StartWith { + start_token: AttachedToken::empty(), + condition: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("title"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value( + Value::SingleQuotedString("president".to_owned()).with_empty_span(), + )), + } + .into() + }, + ], + flavor: SelectFlavor::Standard, + } ); // WHERE must come before CONNECT BY @@ -12582,10 +12870,12 @@ fn parse_connect_by() { "ORDER BY employee_id" ); assert_eq!( - all_dialects_where(|d| d.supports_connect_by()).verified_only_select(connect_by_3), + dialects.verified_only_select(connect_by_3), Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -12615,22 +12905,30 @@ fn parse_connect_by() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: Some(ConnectBy { - condition: Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("title"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::Value( - (Value::SingleQuotedString("president".to_owned(),)).with_empty_span() - )), + connect_by: vec![ + ConnectByKind::StartWith { + start_token: AttachedToken::empty(), + condition: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("title"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value( + (Value::SingleQuotedString("president".to_owned(),)).with_empty_span() + )), + } + .into() }, - relationships: vec![Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("manager_id"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( - "employee_id", - ))))), - }], - }), + ConnectByKind::ConnectBy { + connect_token: AttachedToken::empty(), + nocycle: false, + relationships: vec![Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("manager_id"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( + "employee_id", + ))))), + }], + } + ], flavor: SelectFlavor::Standard, } ); @@ -12642,7 +12940,7 @@ fn parse_connect_by() { "WHERE employee_id <> 42 ", "ORDER BY employee_id" ); - all_dialects_where(|d| d.supports_connect_by()) + dialects .parse_sql_statements(connect_by_4) .expect_err("should have failed"); @@ -12656,13 +12954,68 @@ fn parse_connect_by() { "prior" )))] ); -} -#[test] -fn test_selective_aggregation() { - let testing_dialects = all_dialects_where(|d| d.supports_filter_during_aggregation()); - let expected_dialects: Vec> = vec![ - Box::new(PostgreSqlDialect {}), + // no START WITH and NOCYCLE + let connect_by_5 = "SELECT child, parent FROM t CONNECT BY NOCYCLE parent = PRIOR child"; + assert_eq!( + dialects.verified_only_select(connect_by_5), + Select { + select_token: AttachedToken::empty(), + optimizer_hints: vec![], + distinct: None, + select_modifiers: None, + top: None, + top_before_distinct: false, + projection: vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("child"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("parent"))), + ], + exclude: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![Ident::new("t")])), + joins: vec![], + }], + into: None, + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: vec![ConnectByKind::ConnectBy { + connect_token: AttachedToken::empty(), + nocycle: true, + relationships: vec![Expr::BinaryOp { + left: Expr::Identifier(Ident::new("parent")).into(), + op: BinaryOperator::Eq, + right: Expr::Prior(Expr::Identifier(Ident::new("child")).into()).into(), + }], + }], + flavor: SelectFlavor::Standard, + } + ); + + // CONNECT BY after WHERE and before GROUP BY + dialects.verified_only_select("SELECT 0 FROM t WHERE 1 = 1 CONNECT BY 2 = 2 GROUP BY 3"); + dialects.verified_only_select( + "SELECT 0 FROM t WHERE 1 = 1 START WITH 'a' = 'a' CONNECT BY 2 = 2 GROUP BY 3", + ); + dialects.verified_only_select( + "SELECT 0 FROM t WHERE 1 = 1 CONNECT BY 2 = 2 START WITH 'a' = 'a' GROUP BY 3", + ); +} + +#[test] +fn test_selective_aggregation() { + let testing_dialects = all_dialects_where(|d| d.supports_filter_during_aggregation()); + let expected_dialects: Vec> = vec![ + Box::new(PostgreSqlDialect {}), Box::new(DatabricksDialect {}), Box::new(HiveDialect {}), Box::new(SQLiteDialect {}), @@ -13179,6 +13532,40 @@ fn insert_into_with_parentheses() { dialects.verified_stmt(r#"INSERT INTO t1 ("select", name) (SELECT t2.name FROM t2)"#); } +#[test] +fn test_insert_with_query_table() { + let dialects = all_dialects_where(|d| d.supports_insert_table_query()); + + // a simple query (block); i.e. SELECT ... + let sql = "INSERT INTO (SELECT employee_id, last_name FROM employees) VALUES (207, 'Gregory')"; + dialects.verified_stmt(sql); + + // a full blown query; i.e. `WITH ... SELECT .. ORDER BY ...` + let sql = "INSERT INTO \ + (WITH cte AS (SELECT 1 AS id, 2 AS val FROM dual) SELECT foo_t.id, foo_t.val FROM foo_t \ + WHERE EXISTS (SELECT 1 FROM cte WHERE cte.id = foo_t.id) ORDER BY 1, 2) \ + (id, val) \ + VALUES (1000, 10101)"; + dialects.verified_stmt(sql); + + // an alias to the insert target query table + let sql = "INSERT INTO \ + (WITH cte AS (SELECT 1 AS id, 2 AS val FROM dual) SELECT foo_t.id, foo_t.val FROM foo_t \ + WHERE EXISTS (SELECT 1 FROM cte WHERE cte.id = foo_t.id)) abc \ + (id, val) \ + VALUES (1000, 10101)"; + dialects.verified_stmt(sql); + + // a query table target and a query source + let sql = "INSERT INTO (SELECT foo_t.id, foo_t.val FROM foo_t) SELECT 10, 20 FROM dual"; + dialects.verified_stmt(sql); + + // a query table target and a query source, with explicit columns + let sql = + "INSERT INTO (SELECT foo_t.id, foo_t.val FROM foo_t) (id, val) SELECT 10, 20 FROM dual"; + dialects.verified_stmt(sql); +} + #[test] fn parse_odbc_scalar_function() { let select = verified_only_select("SELECT {fn my_func(1, 2)}"); @@ -13248,6 +13635,7 @@ fn test_dictionary_syntax() { (Value::SingleQuotedString("2023-04-01".to_owned())).with_empty_span(), )), data_type: DataType::Timestamp(None, TimezoneInfo::None), + array: false, format: None, }), }, @@ -13259,6 +13647,7 @@ fn test_dictionary_syntax() { (Value::SingleQuotedString("2023-04-05".to_owned())).with_empty_span(), )), data_type: DataType::Timestamp(None, TimezoneInfo::None), + array: false, format: None, }), }, @@ -13303,6 +13692,10 @@ fn test_map_syntax() { Expr::value(number(s)) } + fn null_expr() -> Expr { + Expr::Value((Value::Null).with_empty_span()) + } + check( "MAP {1: 10.0, 2: 20.0}", Expr::Map(Map { @@ -13367,6 +13760,55 @@ fn test_map_syntax() { ); check("MAP {}", Expr::Map(Map { entries: vec![] })); + + check( + "MAP {'a': 1, 'b': NULL}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value( + (Value::SingleQuotedString("a".to_owned())).with_empty_span(), + )), + value: Box::new(number_expr("1")), + }, + MapEntry { + key: Box::new(Expr::Value( + (Value::SingleQuotedString("b".to_owned())).with_empty_span(), + )), + value: Box::new(null_expr()), + }, + ], + }), + ); + + check( + "MAP {1: [1, NULL, 3], 2: [4, NULL, 6], 3: [7, 8, 9]}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(number_expr("1")), + value: Box::new(Expr::Array(Array { + elem: vec![number_expr("1"), null_expr(), number_expr("3")], + named: false, + })), + }, + MapEntry { + key: Box::new(number_expr("2")), + value: Box::new(Expr::Array(Array { + elem: vec![number_expr("4"), null_expr(), number_expr("6")], + named: false, + })), + }, + MapEntry { + key: Box::new(number_expr("3")), + value: Box::new(Expr::Array(Array { + elem: vec![number_expr("7"), number_expr("8"), number_expr("9")], + named: false, + })), + }, + ], + }), + ); } #[test] @@ -13502,6 +13944,7 @@ fn test_extract_seconds_ok() { fields: None, precision: None }, + array: false, format: None, }), } @@ -13515,7 +13958,9 @@ fn test_extract_seconds_ok() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![UnnamedExpr(Expr::Extract { @@ -13530,6 +13975,7 @@ fn test_extract_seconds_ok() { fields: None, precision: None, }, + array: false, format: None, }), })], @@ -13548,7 +13994,7 @@ fn test_extract_seconds_ok() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -13587,6 +14033,7 @@ fn test_extract_seconds_single_quote_ok() { fields: None, precision: None }, + array: false, format: None, }), } @@ -13771,14 +14218,14 @@ fn test_create_policy() { WITH CHECK (1 = 1)"; match all_dialects().verified_stmt(sql) { - Statement::CreatePolicy { + Statement::CreatePolicy(CreatePolicy { name, table_name, to, using, with_check, .. - } => { + }) => { assert_eq!(name.to_string(), "my_policy"); assert_eq!(table_name.to_string(), "my_table"); assert_eq!( @@ -13879,12 +14326,12 @@ fn test_create_policy() { fn test_drop_policy() { let sql = "DROP POLICY IF EXISTS my_policy ON my_table RESTRICT"; match all_dialects().verified_stmt(sql) { - Statement::DropPolicy { + Statement::DropPolicy(DropPolicy { if_exists, name, table_name, drop_behavior, - } => { + }) => { assert_eq!(if_exists, true); assert_eq!(name.to_string(), "my_policy"); assert_eq!(table_name.to_string(), "my_table"); @@ -13919,12 +14366,12 @@ fn test_drop_policy() { #[test] fn test_alter_policy() { match verified_stmt("ALTER POLICY old_policy ON my_table RENAME TO new_policy") { - Statement::AlterPolicy { + Statement::AlterPolicy(AlterPolicy { name, table_name, operation, .. - } => { + }) => { assert_eq!(name.to_string(), "old_policy"); assert_eq!(table_name.to_string(), "my_table"); assert_eq!( @@ -13941,9 +14388,9 @@ fn test_alter_policy() { "ALTER POLICY my_policy ON my_table TO CURRENT_USER ", "USING ((SELECT c0)) WITH CHECK (c0 > 0)" )) { - Statement::AlterPolicy { + Statement::AlterPolicy(AlterPolicy { name, table_name, .. - } => { + }) => { assert_eq!(name.to_string(), "my_policy"); assert_eq!(table_name.to_string(), "my_table"); } @@ -14695,9 +15142,9 @@ fn test_load_extension() { #[test] fn test_select_top() { let dialects = all_dialects_where(|d| d.supports_top_before_distinct()); - dialects.one_statement_parses_to("SELECT ALL * FROM tbl", "SELECT * FROM tbl"); + dialects.verified_stmt("SELECT ALL * FROM tbl"); dialects.verified_stmt("SELECT TOP 3 * FROM tbl"); - dialects.one_statement_parses_to("SELECT TOP 3 ALL * FROM tbl", "SELECT TOP 3 * FROM tbl"); + dialects.verified_stmt("SELECT TOP 3 ALL * FROM tbl"); dialects.verified_stmt("SELECT TOP 3 DISTINCT * FROM tbl"); dialects.verified_stmt("SELECT TOP 3 DISTINCT a, b, c FROM tbl"); } @@ -14833,14 +15280,23 @@ fn parse_comments() { _ => unreachable!(), } + // https://www.postgresql.org/docs/current/sql-comment.html let object_types = [ ("COLUMN", CommentObject::Column), + ("DATABASE", CommentObject::Database), + ("DOMAIN", CommentObject::Domain), ("EXTENSION", CommentObject::Extension), - ("TABLE", CommentObject::Table), + ("FUNCTION", CommentObject::Function), + ("INDEX", CommentObject::Index), + ("MATERIALIZED VIEW", CommentObject::MaterializedView), + ("PROCEDURE", CommentObject::Procedure), + ("ROLE", CommentObject::Role), ("SCHEMA", CommentObject::Schema), - ("DATABASE", CommentObject::Database), + ("SEQUENCE", CommentObject::Sequence), + ("TABLE", CommentObject::Table), + ("TYPE", CommentObject::Type), ("USER", CommentObject::User), - ("ROLE", CommentObject::Role), + ("VIEW", CommentObject::View), ]; for (keyword, expected_object_type) in object_types.iter() { match all_dialects_where(|d| d.supports_comment_on()) @@ -14941,6 +15397,51 @@ fn test_reserved_keywords_for_identifiers() { dialects.parse_sql_statements(sql).unwrap(); } +#[test] +fn test_keywords_as_column_names_after_dot() { + // Test various keywords that have special meaning when standalone + // but should be treated as identifiers after a dot. + let keywords = [ + "interval", // INTERVAL '1' DAY + "case", // CASE WHEN ... END + "cast", // CAST(x AS y) + "extract", // EXTRACT(DAY FROM ...) + "trim", // TRIM(...) + "substring", // SUBSTRING(...) + "left", // LEFT(str, n) + "right", // RIGHT(str, n) + ]; + + for kw in keywords { + let sql = format!("SELECT T.{kw} FROM T"); + verified_stmt(&sql); + + let sql = format!("SELECT SUM(x) OVER (PARTITION BY T.{kw} ORDER BY T.id) FROM T"); + verified_stmt(&sql); + + let sql = format!("SELECT T.{kw}, S.{kw} FROM T, S WHERE T.{kw} = S.{kw}"); + verified_stmt(&sql); + } + + let select = verified_only_select("SELECT T.interval, T.case FROM T"); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::CompoundIdentifier(idents)) => { + assert_eq!(idents.len(), 2); + assert_eq!(idents[0].value, "T"); + assert_eq!(idents[1].value, "interval"); + } + _ => panic!("Expected CompoundIdentifier for T.interval"), + } + match &select.projection[1] { + SelectItem::UnnamedExpr(Expr::CompoundIdentifier(idents)) => { + assert_eq!(idents.len(), 2); + assert_eq!(idents[0].value, "T"); + assert_eq!(idents[1].value, "case"); + } + _ => panic!("Expected CompoundIdentifier for T.case"), + } +} + #[test] fn parse_create_table_with_bit_types() { let sql = "CREATE TABLE t (a BIT, b BIT VARYING, c BIT(42), d BIT VARYING(43))"; @@ -15193,6 +15694,34 @@ fn overflow() { let statement = statements.pop().unwrap(); assert_eq!(statement.to_string(), sql); } + +#[test] +fn parse_deeply_nested_boolean_expr_does_not_stackoverflow() { + fn build_nested_expr(depth: usize) -> String { + if depth == 0 { + return "x = 1".to_string(); + } + format!( + "({} OR {} AND ({}))", + build_nested_expr(0), + build_nested_expr(0), + build_nested_expr(depth - 1) + ) + } + + let depth = 200; + let where_clause = build_nested_expr(depth); + let sql = format!("SELECT pk FROM tab0 WHERE {where_clause}"); + + let mut statements = Parser::new(&GenericDialect {}) + .try_with_sql(&sql) + .expect("tokenize to work") + .with_recursion_limit(depth * 10) + .parse_statements() + .unwrap(); + let statement = statements.pop().unwrap(); + assert_eq!(statement.to_string(), sql); +} #[test] fn parse_select_without_projection() { let dialects = all_dialects_where(|d| d.supports_empty_projections()); @@ -15589,7 +16118,16 @@ fn test_lambdas() { ] ), Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::Many(vec![Ident::new("p1"), Ident::new("p2")]), + params: OneOrManyWithParens::Many(vec![ + LambdaFunctionParameter { + name: Ident::new("p1"), + data_type: None + }, + LambdaFunctionParameter { + name: Ident::new("p2"), + data_type: None + } + ]), body: Box::new(Expr::Case { case_token: AttachedToken::empty(), end_token: AttachedToken::empty(), @@ -15622,7 +16160,8 @@ fn test_lambdas() { }, ], else_result: Some(Box::new(Expr::value(number("1")))), - }) + }), + syntax: LambdaSyntax::Arrow, }) ] )), @@ -15633,6 +16172,12 @@ fn test_lambdas() { "map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2))", ); dialects.verified_expr("transform(array(1, 2, 3), x -> x + 1)"); + + // Ensure all lambda variants are parsed correctly + dialects.verified_expr("a -> a * 2"); // Single parameter without type + dialects.verified_expr("a INT -> a * 2"); // Single parameter with type + dialects.verified_expr("(a, b) -> a * b"); // Multiple parameters without types + dialects.verified_expr("(a INT, b FLOAT) -> a * b"); // Multiple parameters with types } #[test] @@ -15654,7 +16199,9 @@ fn test_select_from_first() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, projection, exclude: None, @@ -15680,7 +16227,7 @@ fn test_select_from_first() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor, }))), order_by: None, @@ -16099,303 +16646,288 @@ fn parse_set_names() { } #[test] -fn parse_pipeline_operator() { +fn parse_pipe_operator_as() { let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> AS new_users"); +} - // select pipe operator - dialects.verified_stmt("SELECT * FROM users |> SELECT id"); - dialects.verified_stmt("SELECT * FROM users |> SELECT id, name"); +#[test] +fn parse_pipe_operator_select() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> SELECT id"); + dialects.verified_stmt("SELECT * FROM tbl |> SELECT id, name"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> SELECT id user_id", - "SELECT * FROM users |> SELECT id AS user_id", + "SELECT * FROM tbl |> SELECT id user_id", + "SELECT * FROM tbl |> SELECT id AS user_id", ); - dialects.verified_stmt("SELECT * FROM users |> SELECT id AS user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> SELECT id AS user_id"); +} - // extend pipe operator - dialects.verified_stmt("SELECT * FROM users |> EXTEND id + 1 AS new_id"); - dialects.verified_stmt("SELECT * FROM users |> EXTEND id AS new_id, name AS new_name"); +#[test] +fn parse_pipe_operator_extend() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> EXTEND id + 1 AS new_id"); + dialects.verified_stmt("SELECT * FROM tbl |> EXTEND id AS new_id, name AS new_name"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> EXTEND id user_id", - "SELECT * FROM users |> EXTEND id AS user_id", + "SELECT * FROM tbl |> EXTEND id user_id", + "SELECT * FROM tbl |> EXTEND id AS user_id", ); +} - // set pipe operator - dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1"); - dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1, name = name + ' Doe'"); - - // drop pipe operator - dialects.verified_stmt("SELECT * FROM users |> DROP id"); - dialects.verified_stmt("SELECT * FROM users |> DROP id, name"); +#[test] +fn parse_pipe_operator_set() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> SET id = id + 1"); + dialects.verified_stmt("SELECT * FROM tbl |> SET id = id + 1, name = name + ' Doe'"); +} - // as pipe operator - dialects.verified_stmt("SELECT * FROM users |> AS new_users"); +#[test] +fn parse_pipe_operator_drop() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> DROP id"); + dialects.verified_stmt("SELECT * FROM tbl |> DROP id, name"); + dialects.verified_stmt("SELECT * FROM tbl |> DROP c |> RENAME a AS x"); + dialects.verified_stmt("SELECT * FROM tbl |> DROP a, b |> SELECT c"); +} - // limit pipe operator - dialects.verified_stmt("SELECT * FROM users |> LIMIT 10"); - dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 OFFSET 5"); - dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> LIMIT 5"); - dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> WHERE true"); +#[test] +fn parse_pipe_operator_limit() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> LIMIT 10"); + dialects.verified_stmt("SELECT * FROM tbl |> LIMIT 10 OFFSET 5"); + dialects.verified_stmt("SELECT * FROM tbl |> LIMIT 10 |> LIMIT 5"); + dialects.verified_stmt("SELECT * FROM tbl |> LIMIT 10 |> WHERE true"); +} - // where pipe operator - dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1"); - dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 AND name = 'John'"); - dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 OR name = 'John'"); +#[test] +fn parse_pipe_operator_where() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE id = 1"); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE id = 1 AND name = 'John'"); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE id = 1 OR name = 'John'"); +} - // aggregate pipe operator full table - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*)"); +#[test] +fn parse_pipe_operator_aggregate() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE COUNT(*)"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> AGGREGATE COUNT(*) total_users", - "SELECT * FROM users |> AGGREGATE COUNT(*) AS total_users", - ); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*) AS total_users"); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*), MIN(id)"); - - // aggregate pipe opeprator with grouping - dialects.verified_stmt( - "SELECT * FROM users |> AGGREGATE SUM(o_totalprice) AS price, COUNT(*) AS cnt GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", + "SELECT * FROM tbl |> AGGREGATE COUNT(*) total_users", + "SELECT * FROM tbl |> AGGREGATE COUNT(*) AS total_users", ); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE COUNT(*) AS total_users"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE COUNT(*), MIN(id)"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE SUM(o_totalprice) AS price, COUNT(*) AS cnt GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year"); dialects.verified_stmt( - "SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", + "SELECT * FROM tbl |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", ); dialects - .verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate)"); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY a, b"); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE SUM(c) GROUP BY a, b"); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE SUM(c) ASC"); + .verified_stmt("SELECT * FROM tbl |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate)"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE GROUP BY a, b"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE SUM(c) GROUP BY a, b"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE SUM(c) ASC"); +} - // order by pipe operator - dialects.verified_stmt("SELECT * FROM users |> ORDER BY id ASC"); - dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC"); - dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC"); +#[test] +fn parse_pipe_operator_order_by() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> ORDER BY id ASC"); + dialects.verified_stmt("SELECT * FROM tbl |> ORDER BY id DESC"); + dialects.verified_stmt("SELECT * FROM tbl |> ORDER BY id DESC, name ASC"); +} - // tablesample pipe operator +#[test] +fn parse_pipe_operator_tablesample() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)"); dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)"); dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); +} - // rename pipe operator - dialects.verified_stmt("SELECT * FROM users |> RENAME old_name AS new_name"); - dialects.verified_stmt("SELECT * FROM users |> RENAME id AS user_id, name AS user_name"); +#[test] +fn parse_pipe_operator_rename() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> RENAME old_name AS new_name"); + dialects.verified_stmt("SELECT * FROM tbl |> RENAME id AS user_id, name AS user_name"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> RENAME id user_id", - "SELECT * FROM users |> RENAME id AS user_id", + "SELECT * FROM tbl |> RENAME id user_id", + "SELECT * FROM tbl |> RENAME id AS user_id", ); +} - // union pipe operator - dialects.verified_stmt("SELECT * FROM users |> UNION ALL (SELECT * FROM admins)"); - dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins)"); - dialects.verified_stmt("SELECT * FROM users |> UNION (SELECT * FROM admins)"); - - // union pipe operator with multiple queries - dialects.verified_stmt( - "SELECT * FROM users |> UNION ALL (SELECT * FROM admins), (SELECT * FROM guests)", - ); - dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins), (SELECT * FROM guests), (SELECT * FROM employees)"); +#[test] +fn parse_pipe_operator_union() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> UNION ALL (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION DISTINCT (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION (SELECT * FROM admins)"); dialects.verified_stmt( - "SELECT * FROM users |> UNION (SELECT * FROM admins), (SELECT * FROM guests)", + "SELECT * FROM tbl |> UNION ALL (SELECT * FROM admins), (SELECT * FROM guests)", ); - - // union pipe operator with BY NAME modifier - dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins)"); - dialects.verified_stmt("SELECT * FROM users |> UNION ALL BY NAME (SELECT * FROM admins)"); - dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT BY NAME (SELECT * FROM admins)"); - - // union pipe operator with BY NAME and multiple queries + dialects.verified_stmt("SELECT * FROM tbl |> UNION DISTINCT (SELECT * FROM admins), (SELECT * FROM guests), (SELECT * FROM employees)"); + dialects + .verified_stmt("SELECT * FROM tbl |> UNION (SELECT * FROM admins), (SELECT * FROM guests)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION ALL BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION DISTINCT BY NAME (SELECT * FROM admins)"); dialects.verified_stmt( - "SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)", + "SELECT * FROM tbl |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)", ); +} - // intersect pipe operator (BigQuery requires DISTINCT modifier for INTERSECT) - dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins)"); - - // intersect pipe operator with BY NAME modifier +#[test] +fn parse_pipe_operator_intersect() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> INTERSECT DISTINCT (SELECT * FROM admins)"); dialects - .verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); - - // intersect pipe operator with multiple queries + .verified_stmt("SELECT * FROM tbl |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); dialects.verified_stmt( - "SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + "SELECT * FROM tbl |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", ); + dialects.verified_stmt("SELECT * FROM tbl |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); +} - // intersect pipe operator with BY NAME and multiple queries - dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); - - // except pipe operator (BigQuery requires DISTINCT modifier for EXCEPT) - dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins)"); - - // except pipe operator with BY NAME modifier - dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins)"); - - // except pipe operator with multiple queries +#[test] +fn parse_pipe_operator_except() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> EXCEPT DISTINCT (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins)"); dialects.verified_stmt( - "SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + "SELECT * FROM tbl |> EXCEPT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", ); + dialects.verified_stmt("SELECT * FROM tbl |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); +} - // except pipe operator with BY NAME and multiple queries - dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); - - // call pipe operator - dialects.verified_stmt("SELECT * FROM users |> CALL my_function()"); - dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5, 'test')"); +#[test] +fn parse_pipe_operator_call() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> CALL my_function()"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL process_data(5, 'test')"); + dialects + .verified_stmt("SELECT * FROM tbl |> CALL namespace.function_name(col1, col2, 'literal')"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL transform_data(col1 + col2)"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL analyze_data('param1', 100, true)"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL tvf1(arg1) AS al"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL process_data(5) AS result_table"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL namespace.func() AS my_alias"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL tvf1(arg1) |> CALL tvf2(arg2, arg3)"); dialects.verified_stmt( - "SELECT * FROM users |> CALL namespace.function_name(col1, col2, 'literal')", + "SELECT * FROM tbl |> CALL transform(col1) |> CALL validate() |> CALL process(param)", ); - - // call pipe operator with complex arguments - dialects.verified_stmt("SELECT * FROM users |> CALL transform_data(col1 + col2)"); - dialects.verified_stmt("SELECT * FROM users |> CALL analyze_data('param1', 100, true)"); - - // call pipe operator with aliases - dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) AS al"); - dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5) AS result_table"); - dialects.verified_stmt("SELECT * FROM users |> CALL namespace.func() AS my_alias"); - - // multiple call pipe operators in sequence - dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) |> CALL tvf2(arg2, arg3)"); + dialects + .verified_stmt("SELECT * FROM tbl |> CALL tvf1(arg1) AS step1 |> CALL tvf2(arg2) AS step2"); dialects.verified_stmt( - "SELECT * FROM data |> CALL transform(col1) |> CALL validate() |> CALL process(param)", + "SELECT * FROM tbl |> CALL preprocess() AS clean_data |> CALL analyze(mode) AS results", ); - - // multiple call pipe operators with aliases dialects.verified_stmt( - "SELECT * FROM input_table |> CALL tvf1(arg1) AS step1 |> CALL tvf2(arg2) AS step2", + "SELECT * FROM tbl |> CALL transform() |> WHERE status = 'active' |> CALL process(param)", ); dialects.verified_stmt( - "SELECT * FROM data |> CALL preprocess() AS clean_data |> CALL analyze(mode) AS results", + "SELECT * FROM tbl |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()", ); +} - // call pipe operators mixed with other pipe operators +#[test] +fn parse_pipe_operator_pivot() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); dialects.verified_stmt( - "SELECT * FROM users |> CALL transform() |> WHERE status = 'active' |> CALL process(param)", + "SELECT * FROM tbl |> PIVOT(SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))", ); dialects.verified_stmt( - "SELECT * FROM data |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()", + "SELECT * FROM tbl |> PIVOT(AVG(revenue) FOR region IN ('North', 'South', 'East', 'West'))", ); - - // pivot pipe operator + dialects.verified_stmt("SELECT * FROM tbl |> PIVOT(SUM(sales) AS total_sales, COUNT(*) AS num_transactions FOR month IN ('Jan', 'Feb', 'Mar'))"); + dialects.verified_stmt("SELECT * FROM tbl |> PIVOT(SUM(amount) FOR product.category IN ('Electronics', 'Clothing'))"); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE year = 2023 |> PIVOT(SUM(revenue) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); dialects.verified_stmt( - "SELECT * FROM monthly_sales |> PIVOT(SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))", + "SELECT * FROM tbl |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales", ); - dialects.verified_stmt("SELECT * FROM sales_data |> PIVOT(AVG(revenue) FOR region IN ('North', 'South', 'East', 'West'))"); - - // pivot pipe operator with multiple aggregate functions - dialects.verified_stmt("SELECT * FROM data |> PIVOT(SUM(sales) AS total_sales, COUNT(*) AS num_transactions FOR month IN ('Jan', 'Feb', 'Mar'))"); - - // pivot pipe operator with compound column names - dialects.verified_stmt("SELECT * FROM sales |> PIVOT(SUM(amount) FOR product.category IN ('Electronics', 'Clothing'))"); - - // pivot pipe operator mixed with other pipe operators - dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> PIVOT(SUM(revenue) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); - - // pivot pipe operator with aliases - dialects.verified_stmt("SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales"); - dialects.verified_stmt("SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category"); - dialects.verified_stmt("SELECT * FROM sales |> PIVOT(COUNT(*) AS transactions, SUM(amount) AS total FOR region IN ('North', 'South')) AS regional_summary"); - - // pivot pipe operator with implicit aliases (without AS keyword) + dialects.verified_stmt( + "SELECT * FROM tbl |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", + ); + dialects.verified_stmt("SELECT * FROM tbl |> PIVOT(COUNT(*) AS transactions, SUM(amount) AS total FOR region IN ('North', 'South')) AS regional_summary"); dialects.verified_query_with_canonical( - "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) quarterly_sales", - "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales", + "SELECT * FROM tbl |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) quarterly_sales", + "SELECT * FROM tbl |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales", ); dialects.verified_query_with_canonical( - "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) avg_by_category", - "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", + "SELECT * FROM tbl |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) avg_by_category", + "SELECT * FROM tbl |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", ); +} - // unpivot pipe operator basic usage - dialects - .verified_stmt("SELECT * FROM sales |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); - dialects.verified_stmt("SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C))"); +#[test] +fn parse_pipe_operator_unpivot() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); + dialects.verified_stmt("SELECT * FROM tbl |> UNPIVOT(value FOR category IN (A, B, C))"); dialects.verified_stmt( - "SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory, disk))", + "SELECT * FROM tbl |> UNPIVOT(measurement FOR metric_type IN (cpu, memory, disk))", ); - - // unpivot pipe operator with multiple columns - dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (jan, feb, mar, apr, may, jun))"); dialects.verified_stmt( - "SELECT * FROM report |> UNPIVOT(score FOR subject IN (math, science, english, history))", + "SELECT * FROM tbl |> UNPIVOT(amount FOR period IN (jan, feb, mar, apr, may, jun))", ); - - // unpivot pipe operator mixed with other pipe operators - dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); - - // unpivot pipe operator with aliases - dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales"); dialects.verified_stmt( - "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", - ); - dialects.verified_stmt("SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory)) AS metric_measurements"); - - // unpivot pipe operator with implicit aliases (without AS keyword) - dialects.verified_query_with_canonical( - "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) unpivoted_sales", - "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales", - ); - dialects.verified_query_with_canonical( - "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) transformed_data", - "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", + "SELECT * FROM tbl |> UNPIVOT(score FOR subject IN (math, science, english, history))", ); - - // many pipes + dialects.verified_stmt("SELECT * FROM tbl |> WHERE year = 2023 |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); dialects.verified_stmt( - "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", + "SELECT * FROM tbl |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales", ); - - // join pipe operator - INNER JOIN - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id"); - dialects.verified_stmt("SELECT * FROM users |> INNER JOIN orders ON users.id = orders.user_id"); - - // join pipe operator - LEFT JOIN - dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id"); dialects.verified_stmt( - "SELECT * FROM users |> LEFT OUTER JOIN orders ON users.id = orders.user_id", + "SELECT * FROM tbl |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", ); - - // join pipe operator - RIGHT JOIN - dialects.verified_stmt("SELECT * FROM users |> RIGHT JOIN orders ON users.id = orders.user_id"); - dialects.verified_stmt( - "SELECT * FROM users |> RIGHT OUTER JOIN orders ON users.id = orders.user_id", + dialects.verified_stmt("SELECT * FROM tbl |> UNPIVOT(measurement FOR metric_type IN (cpu, memory)) AS metric_measurements"); + dialects.verified_query_with_canonical( + "SELECT * FROM tbl |> UNPIVOT(amount FOR period IN (Q1, Q2)) unpivoted_sales", + "SELECT * FROM tbl |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales", ); - - // join pipe operator - FULL JOIN - dialects.verified_stmt("SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> FULL OUTER JOIN orders ON users.id = orders.user_id", - "SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id", + "SELECT * FROM tbl |> UNPIVOT(value FOR category IN (A, B, C)) transformed_data", + "SELECT * FROM tbl |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", ); +} - // join pipe operator - CROSS JOIN - dialects.verified_stmt("SELECT * FROM users |> CROSS JOIN orders"); - - // join pipe operator with USING +#[test] +fn parse_pipe_operator_join() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> INNER JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects + .verified_stmt("SELECT * FROM tbl |> LEFT OUTER JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> RIGHT JOIN orders ON users.id = orders.user_id"); + dialects + .verified_stmt("SELECT * FROM tbl |> RIGHT OUTER JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> FULL JOIN orders ON users.id = orders.user_id"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> JOIN orders USING (user_id)", - "SELECT * FROM users |> JOIN orders USING(user_id)", + "SELECT * FROM tbl |> FULL OUTER JOIN orders ON users.id = orders.user_id", + "SELECT * FROM tbl |> FULL JOIN orders ON users.id = orders.user_id", ); + dialects.verified_stmt("SELECT * FROM tbl |> CROSS JOIN orders"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> LEFT JOIN orders USING (user_id, order_date)", - "SELECT * FROM users |> LEFT JOIN orders USING(user_id, order_date)", + "SELECT * FROM tbl |> JOIN orders USING (user_id)", + "SELECT * FROM tbl |> JOIN orders USING(user_id)", ); - - // join pipe operator with alias (with an omitted "AS" keyword) dialects.verified_query_with_canonical( - "SELECT * FROM users |> JOIN orders o ON users.id = o.user_id", - "SELECT * FROM users |> JOIN orders o ON users.id = o.user_id", + "SELECT * FROM tbl |> LEFT JOIN orders USING (user_id, order_date)", + "SELECT * FROM tbl |> LEFT JOIN orders USING(user_id, order_date)", ); - dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders AS o ON users.id = o.user_id"); - - // join pipe operator with complex ON condition - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id AND orders.status = 'active'"); - dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id AND orders.amount > 100"); - - // multiple join pipe operators - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> JOIN products ON orders.product_id = products.id"); - dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id |> RIGHT JOIN products ON orders.product_id = products.id"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders o ON users.id = o.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> LEFT JOIN orders AS o ON users.id = o.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id AND orders.status = 'active'"); + dialects.verified_stmt("SELECT * FROM tbl |> LEFT JOIN orders ON users.id = orders.user_id AND orders.amount > 100"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id |> JOIN products ON orders.product_id = products.id"); + dialects.verified_stmt("SELECT * FROM tbl |> LEFT JOIN orders ON users.id = orders.user_id |> RIGHT JOIN products ON orders.product_id = products.id"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id |> WHERE orders.amount > 100"); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE users.active = true |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id |> SELECT users.name, orders.amount"); +} - // join pipe operator with other pipe operators - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> WHERE orders.amount > 100"); - dialects.verified_stmt("SELECT * FROM users |> WHERE users.active = true |> LEFT JOIN orders ON users.id = orders.user_id"); - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> SELECT users.name, orders.amount"); +#[test] +fn parse_pipe_operator_chained() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC"); } #[test] @@ -16704,10 +17236,12 @@ fn parse_truncate_only() { TruncateTableTarget { name: ObjectName::from(vec![Ident::new("employee")]), only: false, + has_asterisk: false, }, TruncateTableTarget { name: ObjectName::from(vec![Ident::new("dept")]), only: true, + has_asterisk: false, }, ]; @@ -16716,6 +17250,7 @@ fn parse_truncate_only() { table_names, partitions: None, table: true, + if_exists: false, identity: None, cascade: None, on_cluster: None, @@ -16731,6 +17266,15 @@ fn check_enforced() { ); } +#[test] +fn column_check_enforced() { + all_dialects().verified_stmt("CREATE TABLE t (x INT CHECK (x > 1) NOT ENFORCED)"); + all_dialects().verified_stmt("CREATE TABLE t (x INT CHECK (x > 1) ENFORCED)"); + all_dialects().verified_stmt( + "CREATE TABLE t (a INT CHECK (a > 0) NOT ENFORCED, b INT CHECK (b > 0) ENFORCED, c INT CHECK (c > 0))", + ); +} + #[test] fn join_precedence() { all_dialects_except(|d| !d.supports_left_associative_joins_without_parens()) @@ -16927,7 +17471,9 @@ fn test_select_exclude() { SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { assert_eq!( *opt_exclude, - Some(ExcludeSelectItem::Single(Ident::new("c1"))) + Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "c1" + )))) ); } _ => unreachable!(), @@ -16940,8 +17486,8 @@ fn test_select_exclude() { assert_eq!( *opt_exclude, Some(ExcludeSelectItem::Multiple(vec![ - Ident::new("c1"), - Ident::new("c2") + ObjectName::from(Ident::new("c1")), + ObjectName::from(Ident::new("c2")), ])) ); } @@ -16952,7 +17498,9 @@ fn test_select_exclude() { SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { assert_eq!( *opt_exclude, - Some(ExcludeSelectItem::Single(Ident::new("c1"))) + Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "c1" + )))) ); } _ => unreachable!(), @@ -16974,7 +17522,9 @@ fn test_select_exclude() { } assert_eq!( select.exclude, - Some(ExcludeSelectItem::Single(Ident::new("c1"))) + Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "c1" + )))) ); let dialects = all_dialects_where(|d| { @@ -16985,7 +17535,9 @@ fn test_select_exclude() { SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { assert_eq!( *opt_exclude, - Some(ExcludeSelectItem::Single(Ident::new("c1"))) + Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "c1" + )))) ); } _ => unreachable!(), @@ -17022,6 +17574,32 @@ fn test_select_exclude() { ); } +#[test] +fn test_select_exclude_qualified_names() { + // EXCLUDE should accept qualified names like `f.col` parsed as ObjectName. + let dialects = all_dialects_where(|d| d.supports_select_wildcard_exclude()); + + // Qualified name in multi-column EXCLUDE list: f.* EXCLUDE (f.col1, f.col2) + let select = dialects + .verified_only_select("SELECT f.* EXCLUDE (f.account_canonical_id, f.amount) FROM t AS f"); + match &select.projection[0] { + SelectItem::QualifiedWildcard(_, WildcardAdditionalOptions { opt_exclude, .. }) => { + assert_eq!( + *opt_exclude, + Some(ExcludeSelectItem::Multiple(vec![ + ObjectName::from(vec![Ident::new("f"), Ident::new("account_canonical_id")]), + ObjectName::from(vec![Ident::new("f"), Ident::new("amount")]), + ])) + ); + } + _ => unreachable!(), + } + + // Plain identifiers must still parse successfully. + dialects.verified_only_select("SELECT f.* EXCLUDE (account_canonical_id) FROM t AS f"); + dialects.verified_only_select("SELECT f.* EXCLUDE (col1, col2) FROM t AS f"); +} + #[test] fn test_no_semicolon_required_between_statements() { let sql = r#" @@ -17143,19 +17721,21 @@ fn parse_create_user() { options: vec![ KeyValueOption { option_name: "PASSWORD".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "secret".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("secret".to_string()).with_empty_span() + ), }, KeyValueOption { option_name: "MUST_CHANGE_PASSWORD".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(false)), + option_value: KeyValueOptionKind::Single( + Value::Boolean(false).with_empty_span() + ), }, KeyValueOption { option_name: "TYPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - "SERVICE".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("SERVICE".to_string()).with_empty_span() + ), }, ], }, @@ -17168,15 +17748,15 @@ fn parse_create_user() { options: vec![ KeyValueOption { option_name: "t1".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "v1".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("v1".to_string()).with_empty_span() + ), }, KeyValueOption { option_name: "t2".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "v2".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("v2".to_string()).with_empty_span() + ), }, ] } @@ -17245,6 +17825,12 @@ fn test_parse_not_null_in_column_options() { ); } +#[test] +fn test_parse_default_expr_with_operators() { + all_dialects().verified_stmt("CREATE TABLE t (c INT DEFAULT (1 + 2) + 3)"); + all_dialects().verified_stmt("CREATE TABLE t (c INT DEFAULT (1 + 2) + 3 NOT NULL)"); +} + #[test] fn test_parse_default_with_collate_column_option() { let sql = "CREATE TABLE foo (abc TEXT DEFAULT 'foo' COLLATE 'en_US')"; @@ -17401,6 +17987,9 @@ fn parse_copy_options() { "EMPTYASNULL ", "IAM_ROLE DEFAULT ", "IGNOREHEADER AS 1 ", + "JSON ", + "JSON 'auto' ", + "JSON AS 'auto' ", "TIMEFORMAT AS 'auto' ", "TRUNCATECOLUMNS ", "REMOVEQUOTES ", @@ -17426,6 +18015,9 @@ fn parse_copy_options() { "EMPTYASNULL ", "IAM_ROLE DEFAULT ", "IGNOREHEADER 1 ", + "JSON ", + "JSON AS 'auto' ", + "JSON AS 'auto' ", "TIMEFORMAT 'auto' ", "TRUNCATECOLUMNS ", "REMOVEQUOTES ", @@ -17584,6 +18176,25 @@ fn parse_adjacent_string_literal_concatenation() { let sql = "SELECT * FROM t WHERE col = 'Hello' \n ' ' \t 'World!'"; dialects.one_statement_parses_to(sql, r"SELECT * FROM t WHERE col = 'Hello World!'"); + + let dialects = all_dialects_where(|d| d.supports_string_literal_concatenation_with_newline()); + let sql = r#" + SELECT 'abc' in ('a' + 'b' + 'c', + 'd' + )"#; + dialects.one_statement_parses_to(sql, "SELECT 'abc' IN ('abc', 'd')"); + + let sql = r#" + SELECT 'abc' in ('a' + 'b' + -- COMMENT + 'c', + -- COMMENT + 'd' + )"#; + dialects.one_statement_parses_to(sql, "SELECT 'abc' IN ('abc', 'd')"); } #[test] @@ -17771,9 +18382,9 @@ fn test_parse_alter_user() { alter.set_tag.options, vec![KeyValueOption { option_name: "k1".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "v1".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("v1".to_string()).with_empty_span() + ), },] ); } @@ -17807,17 +18418,21 @@ fn test_parse_alter_user() { options: vec![ KeyValueOption { option_name: "PASSWORD".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "secret".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("secret".to_string()).with_empty_span() + ), }, KeyValueOption { option_name: "MUST_CHANGE_PASSWORD".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(true)), + option_value: KeyValueOptionKind::Single( + Value::Boolean(true).with_empty_span() + ), }, KeyValueOption { option_name: "MINS_TO_UNLOCK".to_string(), - option_value: KeyValueOptionKind::Single(number("10")), + option_value: KeyValueOptionKind::Single( + number("10").with_empty_span() + ), }, ] } @@ -17844,7 +18459,8 @@ fn test_parse_alter_user() { option_name: "DEFAULT_SECONDARY_ROLES".to_string(), option_value: KeyValueOptionKind::Multi(vec![Value::SingleQuotedString( "ALL".to_string() - )]) + ) + .with_empty_span()]) }] ); } @@ -17868,9 +18484,9 @@ fn test_parse_alter_user() { options: vec![ KeyValueOption { option_name: "TYPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - "AWS".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("AWS".to_string()).with_empty_span() + ), }, KeyValueOption { option_name: "ARN".to_string(), @@ -17878,6 +18494,7 @@ fn test_parse_alter_user() { Value::SingleQuotedString( "arn:aws:iam::123456789:r1/".to_string() ) + .with_empty_span() ), }, ] @@ -17888,6 +18505,15 @@ fn test_parse_alter_user() { _ => unreachable!(), } verified_stmt("ALTER USER u1 SET DEFAULT_SECONDARY_ROLES=('ALL'), PASSWORD='secret', WORKLOAD_IDENTITY=(TYPE=AWS, ARN='arn:aws:iam::123456789:r1/')"); + + verified_stmt("ALTER USER u1 PASSWORD 'AAA'"); + verified_stmt("ALTER USER u1 ENCRYPTED PASSWORD 'AAA'"); + verified_stmt("ALTER USER u1 PASSWORD NULL"); + + one_statement_parses_to( + "ALTER USER u1 WITH PASSWORD 'AAA'", + "ALTER USER u1 PASSWORD 'AAA'", + ); } #[test] @@ -17956,3 +18582,222 @@ fn test_parse_set_session_authorization() { })) ); } + +#[test] +fn test_set_authorization_without_scope_errors() { + // This should return a parser error, not panic. + let res = parse_sql_statements("SET AUTHORIZATION TIME TIME"); + assert!( + res.is_err(), + "SET AUTHORIZATION without a scope modifier (e.g. SESSION) should error" + ); +} + +#[test] +fn parse_select_parenthesized_wildcard() { + // Test SELECT DISTINCT(*) which uses a parenthesized wildcard + // The parentheses are syntactic sugar and get normalized to just * + let sql = "SELECT DISTINCT (*) FROM table1"; + let canonical = "SELECT DISTINCT * FROM table1"; + let select = all_dialects().verified_only_select_with_canonical(sql, canonical); + assert_eq!(select.distinct, Some(Distinct::Distinct)); + assert_eq!(select.projection.len(), 1); + assert!(matches!(select.projection[0], SelectItem::Wildcard(_))); + + // Also test without spaces: SELECT DISTINCT(*) + let sql_no_spaces = "SELECT DISTINCT(*) FROM table1"; + let select2 = all_dialects().verified_only_select_with_canonical(sql_no_spaces, canonical); + assert_eq!(select2.distinct, Some(Distinct::Distinct)); + assert_eq!(select2.projection.len(), 1); + assert!(matches!(select2.projection[0], SelectItem::Wildcard(_))); +} + +#[test] +fn parse_overlap_as_bool_and() { + let dialects = all_dialects_where(|d| d.supports_double_ampersand_operator()); + dialects.one_statement_parses_to("SELECT x && y", "SELECT x AND y"); +} + +#[test] +fn test_parse_key_value_options_trailing_semicolon() { + one_statement_parses_to( + "CREATE USER u1 option1='value1' option2='value2';", + "CREATE USER u1 option1='value1' option2='value2'", + ); +} + +#[test] +fn test_binary_kw_as_cast() { + all_dialects_where(|d| d.supports_binary_kw_as_cast()) + .one_statement_parses_to("SELECT BINARY 1+1", "SELECT CAST(1 + 1 AS BINARY)"); +} + +#[test] +fn parse_semi_structured_data_traversal() { + let dialects = TestedDialects::new(vec![ + Box::new(GenericDialect {}), + Box::new(SnowflakeDialect {}), + Box::new(DatabricksDialect {}), + ]); + + // most basic case + let sql = "SELECT a:b FROM t"; + let select = dialects.verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + has_colon: true, + path: vec![JsonPathElem::Dot { + key: "b".to_owned(), + quoted: false + }] + }, + }), + select.projection[0] + ); + + // identifier can be quoted + let sql = r#"SELECT a:"my long object key name" FROM t"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + has_colon: true, + path: vec![JsonPathElem::Dot { + key: "my long object key name".to_owned(), + quoted: true + }] + }, + }), + select.projection[0] + ); + + dialects.verified_stmt("SELECT a:b::INT FROM t"); + + // unquoted keywords are permitted in the object key + let sql = "SELECT a:select, a:from FROM t"; + let select = dialects.verified_only_select(sql); + assert_eq!( + vec![ + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + has_colon: true, + path: vec![JsonPathElem::Dot { + key: "select".to_owned(), + quoted: false + }] + }, + }), + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + has_colon: true, + path: vec![JsonPathElem::Dot { + key: "from".to_owned(), + quoted: false + }] + }, + }) + ], + select.projection + ); + + // multiple levels can be traversed + // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation + let sql = r#"SELECT a:foo."bar".baz"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + vec![SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + has_colon: true, + path: vec![ + JsonPathElem::Dot { + key: "foo".to_owned(), + quoted: false, + }, + JsonPathElem::Dot { + key: "bar".to_owned(), + quoted: true, + }, + JsonPathElem::Dot { + key: "baz".to_owned(), + quoted: false, + } + ] + }, + })], + select.projection + ); + + // dot and bracket notation can be mixed (starting with : case) + // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation + let sql = r#"SELECT a:foo[0].bar"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + vec![SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + has_colon: true, + path: vec![ + JsonPathElem::Dot { + key: "foo".to_owned(), + quoted: false, + }, + JsonPathElem::Bracket { + key: Expr::value(number("0")), + }, + JsonPathElem::Dot { + key: "bar".to_owned(), + quoted: false, + } + ] + }, + })], + select.projection + ); +} + +#[test] +fn parse_array_subscript() { + let dialects = all_dialects_except(|d| { + d.is::() + || d.is::() + || d.is::() + || d.is::() + // Databricks uses `:` for JSON path access (high precedence), which conflicts + // with array slice syntax `arr[1:2]`. + || d.is::() + }); + + dialects.verified_stmt("SELECT arr[1]"); + dialects.verified_stmt("SELECT arr[:]"); + dialects.verified_stmt("SELECT arr[1:2]"); + dialects.verified_stmt("SELECT arr[1:2:4]"); + dialects.verified_stmt("SELECT arr[1:array_length(arr)]"); + dialects.verified_stmt("SELECT arr[array_length(arr) - 1:array_length(arr)]"); + dialects + .verified_stmt("SELECT arr[array_length(arr) - 2:array_length(arr) - 1:array_length(arr)]"); + + dialects.verified_stmt("SELECT arr[1][2]"); + dialects.verified_stmt("SELECT arr[:][:]"); +} + +#[test] +fn test_wildcard_func_arg() { + // Wildcard (*) and wildcard with EXCLUDE as a function argument. + // Documented for Snowflake's HASH function but parsed for any dialect that + // supports the wildcard-EXCLUDE select syntax. + let dialects = all_dialects_where(|d| d.supports_select_wildcard_exclude()); + + // Wildcard with EXCLUDE — canonical form has a space before the parenthesised column list. + dialects.one_statement_parses_to( + "SELECT HASH(* EXCLUDE(col1)) FROM t", + "SELECT HASH(* EXCLUDE (col1)) FROM t", + ); + dialects.verified_expr("HASH(* EXCLUDE (col1))"); + dialects.verified_expr("HASH(* EXCLUDE (col1, col2))"); +} diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 3d2ca9d77f..d7e02ce4cc 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -95,8 +95,12 @@ fn test_databricks_exists() { ] ), Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::One(Ident::new("x")), - body: Box::new(Expr::IsNull(Box::new(Expr::Identifier(Ident::new("x"))))) + params: OneOrManyWithParens::One(LambdaFunctionParameter { + name: Ident::new("x"), + data_type: None + }), + body: Box::new(Expr::IsNull(Box::new(Expr::Identifier(Ident::new("x"))))), + syntax: LambdaSyntax::Arrow, }) ] ), @@ -131,7 +135,16 @@ fn test_databricks_lambdas() { ] ), Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::Many(vec![Ident::new("p1"), Ident::new("p2")]), + params: OneOrManyWithParens::Many(vec![ + LambdaFunctionParameter { + name: Ident::new("p1"), + data_type: None + }, + LambdaFunctionParameter { + name: Ident::new("p2"), + data_type: None + } + ]), body: Box::new(Expr::Case { case_token: AttachedToken::empty(), end_token: AttachedToken::empty(), @@ -164,7 +177,8 @@ fn test_databricks_lambdas() { }, ], else_result: Some(Box::new(Expr::value(number("1")))) - }) + }), + syntax: LambdaSyntax::Arrow, }) ] )), @@ -370,6 +384,7 @@ fn data_type_timestamp_ntz() { "created_at".into() )))), data_type: DataType::TimestampNtz(None), + array: false, format: None } ); @@ -390,6 +405,269 @@ fn data_type_timestamp_ntz() { } } +#[test] +fn parse_table_time_travel() { + all_dialects_where(|d| d.supports_table_versioning()) + .verified_only_select("SELECT 1 FROM t1 TIMESTAMP AS OF '2018-10-18T22:15:12.013Z'"); + + all_dialects_where(|d| d.supports_table_versioning()).verified_only_select( + "SELECT 1 FROM t1 TIMESTAMP AS OF CURRENT_TIMESTAMP() - INTERVAL 12 HOURS", + ); + + all_dialects_where(|d| d.supports_table_versioning()) + .verified_only_select("SELECT 1 FROM t1 VERSION AS OF 1"); + + assert!(databricks() + .parse_sql_statements("SELECT 1 FROM t1 FOR TIMESTAMP AS OF 'some_timestamp'") + .is_err()); + + assert!(all_dialects_where(|d| d.supports_table_versioning()) + .parse_sql_statements("SELECT 1 FROM t1 VERSION AS OF 1 - 2",) + .is_err()) +} + +#[test] +fn parse_optimize_table() { + // Basic OPTIMIZE (Databricks style - no TABLE keyword) + databricks().verified_stmt("OPTIMIZE my_table"); + databricks().verified_stmt("OPTIMIZE db.my_table"); + databricks().verified_stmt("OPTIMIZE catalog.db.my_table"); + + // With WHERE clause + databricks().verified_stmt("OPTIMIZE my_table WHERE date = '2023-01-01'"); + databricks() + .verified_stmt("OPTIMIZE my_table WHERE date >= '2023-01-01' AND date < '2023-02-01'"); + + // With ZORDER BY clause + databricks().verified_stmt("OPTIMIZE my_table ZORDER BY (col1)"); + databricks().verified_stmt("OPTIMIZE my_table ZORDER BY (col1, col2)"); + databricks().verified_stmt("OPTIMIZE my_table ZORDER BY (col1, col2, col3)"); + + // Combined WHERE and ZORDER BY + databricks().verified_stmt("OPTIMIZE my_table WHERE date = '2023-01-01' ZORDER BY (col1)"); + databricks() + .verified_stmt("OPTIMIZE my_table WHERE date >= '2023-01-01' ZORDER BY (col1, col2)"); + + // Verify AST structure + match databricks() + .verified_stmt("OPTIMIZE my_table WHERE date = '2023-01-01' ZORDER BY (col1, col2)") + { + Statement::OptimizeTable { + name, + has_table_keyword, + on_cluster, + partition, + include_final, + deduplicate, + predicate, + zorder, + } => { + assert_eq!(name.to_string(), "my_table"); + assert!(!has_table_keyword); + assert!(on_cluster.is_none()); + assert!(partition.is_none()); + assert!(!include_final); + assert!(deduplicate.is_none()); + assert!(predicate.is_some()); + assert_eq!( + zorder, + Some(vec![ + Expr::Identifier(Ident::new("col1")), + Expr::Identifier(Ident::new("col2")), + ]) + ); + } + _ => unreachable!(), + } + + // Negative cases + assert_eq!( + databricks() + .parse_sql_statements("OPTIMIZE my_table ZORDER BY") + .unwrap_err(), + ParserError::ParserError("Expected: (, found: EOF".to_string()) + ); + assert_eq!( + databricks() + .parse_sql_statements("OPTIMIZE my_table ZORDER BY ()") + .unwrap_err(), + ParserError::ParserError("Expected: an expression, found: )".to_string()) + ); +} + +#[test] +fn parse_create_table_partitioned_by() { + // Databricks allows PARTITIONED BY with just column names (referencing existing columns) + // https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html + + // Single partition column without type + databricks().verified_stmt("CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1)"); + + // Multiple partition columns without types + databricks().verified_stmt( + "CREATE TABLE t (col1 STRING, col2 INT, col3 DATE) PARTITIONED BY (col1, col2)", + ); + + // Partition columns with types (new columns not in table spec) + databricks().verified_stmt("CREATE TABLE t (name STRING) PARTITIONED BY (year INT, month INT)"); + + // Mixed: some with types, some without + databricks() + .verified_stmt("CREATE TABLE t (id INT, name STRING) PARTITIONED BY (region, year INT)"); + + // Verify AST structure for column without type + match databricks().verified_stmt("CREATE TABLE t (col1 STRING) PARTITIONED BY (col1)") { + Statement::CreateTable(CreateTable { + name, + columns, + hive_distribution, + .. + }) => { + assert_eq!(name.to_string(), "t"); + assert_eq!(columns.len(), 1); + assert_eq!(columns[0].name.to_string(), "col1"); + match hive_distribution { + HiveDistributionStyle::PARTITIONED { + columns: partition_cols, + } => { + assert_eq!(partition_cols.len(), 1); + assert_eq!(partition_cols[0].name.to_string(), "col1"); + assert_eq!(partition_cols[0].data_type, DataType::Unspecified); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } + + // Verify AST structure for column with type + match databricks().verified_stmt("CREATE TABLE t (name STRING) PARTITIONED BY (year INT)") { + Statement::CreateTable(CreateTable { + hive_distribution: + HiveDistributionStyle::PARTITIONED { + columns: partition_cols, + }, + .. + }) => { + assert_eq!(partition_cols.len(), 1); + assert_eq!(partition_cols[0].name.to_string(), "year"); + assert_eq!(partition_cols[0].data_type, DataType::Int(None)); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_databricks_struct_type() { + // Databricks uses colon-separated struct field syntax (colon is optional) + // https://docs.databricks.com/en/sql/language-manual/data-types/struct-type.html + + // Basic struct with colon syntax - parses to canonical form without colons + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT)", + "CREATE TABLE t (col1 STRUCT)", + ); + + // Nested array of struct (the original issue case) + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 ARRAY>)", + "CREATE TABLE t (col1 ARRAY>)", + ); + + // Multiple struct columns + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT, col2 STRUCT)", + "CREATE TABLE t (col1 STRUCT, col2 STRUCT)", + ); + + // Deeply nested structs + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT>)", + "CREATE TABLE t (col1 STRUCT>)", + ); + + // Struct with array field + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT, name: STRING>)", + "CREATE TABLE t (col1 STRUCT, name STRING>)", + ); + + // Syntax without colons should also work (BigQuery compatible) + databricks().verified_stmt("CREATE TABLE t (col1 STRUCT)"); + + // Verify AST structure + match databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT)", + "CREATE TABLE t (col1 STRUCT)", + ) { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!(columns.len(), 1); + assert_eq!(columns[0].name.to_string(), "col1"); + match &columns[0].data_type { + DataType::Struct(fields, StructBracketKind::AngleBrackets) => { + assert_eq!(fields.len(), 2); + assert_eq!( + fields[0].field_name.as_ref().map(|i| i.to_string()), + Some("field1".to_string()) + ); + assert_eq!(fields[0].field_type, DataType::String(None)); + assert_eq!( + fields[1].field_name.as_ref().map(|i| i.to_string()), + Some("field2".to_string()) + ); + assert_eq!(fields[1].field_type, DataType::Int(None)); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + +#[test] +fn parse_databricks_json_accessor() { + // Basic colon accessor — unquoted field names are case-insensitive + databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM store_data"); + + // Unquoted field access is case-insensitive; bracket notation is case-sensitive. + databricks().verified_only_select( + "SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive FROM store_data", + ); + + // Backtick-quoted keys (Databricks delimited identifiers) normalise to double-quoted output. + databricks().one_statement_parses_to( + "SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM store_data", + r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM store_data"#, + ); + + // Dot notation + databricks().verified_only_select("SELECT raw:store.bicycle FROM store_data"); + + // String-key bracket notation after a dot segment + databricks() + .verified_only_select("SELECT raw:store['bicycle'], raw:store['BICYCLE'] FROM store_data"); + + // Integer-index bracket notation + databricks() + .verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1] FROM store_data"); + + // Wildcard [*] — including chained and mixed positions + databricks().verified_only_select( + "SELECT raw:store.basket[*], raw:store.basket[*][0] AS first_of_baskets, \ + raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS all_elements_flattened, \ + raw:store.basket[0][2].b AS subfield FROM store_data", + ); + + // Dot access following a wildcard bracket + databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM store_data"); + + // Double-colon cast — type keyword normalises to upper case + databricks().one_statement_parses_to( + "SELECT raw:store.bicycle.price::double FROM store_data", + "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", + ); +} + #[test] fn parse_semi_structured_data_traversal() { // basic case @@ -440,16 +718,16 @@ fn parse_semi_structured_data_traversal() { select.projection[0] ); - // asterisk for arrays + // colon bracket notation: a:['b'].c let sql = "SELECT a:['b'].c FROM t"; let select = databricks().verified_only_select(sql); assert_eq!( SelectItem::UnnamedExpr(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { - has_colon: true, + has_colon: false, path: vec![ - JsonPathElem::Bracket { + JsonPathElem::ColonBracket { key: Expr::value(Value::SingleQuotedString("b".to_owned())), }, JsonPathElem::Dot { diff --git a/tests/sqlparser_derive_dialect.rs b/tests/sqlparser_derive_dialect.rs new file mode 100644 index 0000000000..d60fa1e11d --- /dev/null +++ b/tests/sqlparser_derive_dialect.rs @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Tests for the `derive_dialect!` macro. + +use sqlparser::derive_dialect; +use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect, PostgreSqlDialect}; +use sqlparser::parser::Parser; + +#[test] +fn test_method_overrides() { + derive_dialect!(EnhancedGenericDialect, GenericDialect, overrides = { + supports_order_by_all = true, + supports_triple_quoted_string = true, + }); + let dialect = EnhancedGenericDialect::new(); + + // Overridden methods + assert!(dialect.supports_order_by_all()); + assert!(dialect.supports_triple_quoted_string()); + + // Non-overridden retains base behavior + assert!(!dialect.supports_factorial_operator()); + + // Parsing works with the overrides + let result = Parser::new(&dialect) + .try_with_sql("SELECT '''value''' FROM t ORDER BY ALL") + .unwrap() + .parse_statements(); + + assert!(result.is_ok()); +} + +#[test] +fn test_preserve_type_id() { + // Check the override works and the parser recognizes it as the base type + derive_dialect!( + PreservedTypeDialect, + GenericDialect, + preserve_type_id = true, + overrides = { supports_order_by_all = true } + ); + let dialect = PreservedTypeDialect::new(); + let d: &dyn Dialect = &dialect; + + assert!(dialect.supports_order_by_all()); + assert!(d.is::()); +} + +#[test] +fn test_different_base_dialects() { + derive_dialect!( + EnhancedMySqlDialect, + MySqlDialect, + overrides = { supports_order_by_all = true } + ); + derive_dialect!(UniquePostgreSqlDialect, PostgreSqlDialect); + + let pg = UniquePostgreSqlDialect::new(); + let mysql = EnhancedMySqlDialect::new(); + + // Inherit different base behaviors + assert!(pg.supports_filter_during_aggregation()); // PostgreSQL feature + assert!(mysql.supports_string_literal_backslash_escape()); // MySQL feature + assert!(mysql.supports_order_by_all()); // Override + + // Each has unique TypeId + let pg_ref: &dyn Dialect = &pg; + let mysql_ref: &dyn Dialect = &mysql; + assert!(pg_ref.is::()); + assert!(!pg_ref.is::()); + assert!(mysql_ref.is::()); +} + +#[test] +fn test_identifier_quote_style_overrides() { + derive_dialect!( + BacktickGenericDialect, + GenericDialect, + overrides = { identifier_quote_style = '`' } + ); + derive_dialect!( + AnotherBacktickDialect, + GenericDialect, + overrides = { identifier_quote_style = '[' } + ); + derive_dialect!( + QuotelessPostgreSqlDialect, + PostgreSqlDialect, + preserve_type_id = true, + overrides = { identifier_quote_style = None } + ); + + // Char literal (auto-wrapped in Some) + assert_eq!( + BacktickGenericDialect::new().identifier_quote_style("x"), + Some('`') + ); + // Another char literal + assert_eq!( + AnotherBacktickDialect::new().identifier_quote_style("x"), + Some('[') + ); + // None (overrides PostgreSQL's default '"') + assert_eq!( + QuotelessPostgreSqlDialect::new().identifier_quote_style("x"), + None + ); +} diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 73a1afe260..df62685808 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -156,7 +156,9 @@ fn column_defs(statement: Statement) -> Vec { fn test_select_wildcard_with_exclude() { let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Multiple(vec![Ident::new("col_a")])), + opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ObjectName::from( + Ident::new("col_a"), + )])), ..Default::default() }); assert_eq!(expected, select.projection[0]); @@ -166,7 +168,9 @@ fn test_select_wildcard_with_exclude() { let expected = SelectItem::QualifiedWildcard( SelectItemQualifiedWildcardKind::ObjectName(ObjectName::from(vec![Ident::new("name")])), WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))), + opt_exclude: Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "department_id", + )))), ..Default::default() }, ); @@ -176,8 +180,8 @@ fn test_select_wildcard_with_exclude() { .verified_only_select("SELECT * EXCLUDE (department_id, employee_id) FROM employee_table"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ - Ident::new("department_id"), - Ident::new("employee_id"), + ObjectName::from(Ident::new("department_id")), + ObjectName::from(Ident::new("employee_id")), ])), ..Default::default() }); @@ -266,7 +270,9 @@ fn test_select_union_by_name() { set_quantifier: *expected_quantifier, left: Box::::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], exclude: None, @@ -292,12 +298,14 @@ fn test_select_union_by_name() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), right: Box::::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], exclude: None, @@ -323,7 +331,7 @@ fn test_select_union_by_name() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), }); @@ -380,6 +388,7 @@ fn test_duckdb_specific_int_types() { Value::Number("123".parse().unwrap(), false).with_empty_span() )), data_type: data_type.clone(), + array: false, format: None, }, expr_from_projection(&select.projection[0]) @@ -700,6 +709,7 @@ fn test_duckdb_union_datatype() { transient: Default::default(), volatile: Default::default(), iceberg: Default::default(), + snapshot: false, dynamic: Default::default(), name: ObjectName::from(vec!["tbl1".into()]), columns: vec![ @@ -755,6 +765,8 @@ fn test_duckdb_union_datatype() { cluster_by: Default::default(), clustered_by: Default::default(), inherits: Default::default(), + partition_of: Default::default(), + for_values: Default::default(), strict: Default::default(), copy_grants: Default::default(), enable_schema_evolution: Default::default(), @@ -764,6 +776,7 @@ fn test_duckdb_union_datatype() { default_ddl_collation: Default::default(), with_aggregation_policy: Default::default(), with_row_access_policy: Default::default(), + with_storage_lifecycle_policy: Default::default(), with_tags: Default::default(), base_location: Default::default(), external_volume: Default::default(), @@ -777,6 +790,10 @@ fn test_duckdb_union_datatype() { refresh_mode: None, initialize: None, require_user: Default::default(), + diststyle: Default::default(), + distkey: Default::default(), + sortkey: Default::default(), + backup: Default::default(), }), stmt ); @@ -870,3 +887,22 @@ fn parse_extract_single_quotes() { let sql = "SELECT EXTRACT('month' FROM my_timestamp) FROM my_table"; duckdb().verified_stmt(sql); } + +#[test] +fn test_duckdb_lambda_function() { + // Test basic lambda with list_filter + let sql = "SELECT [3, 4, 5, 6].list_filter(lambda x : x > 4)"; + duckdb().verified_stmt(sql); + + // Test lambda with arrow syntax (also supported by DuckDB) + let sql_arrow = "SELECT list_filter([1, 2, 3], x -> x > 1)"; + duckdb().verified_stmt(sql_arrow); + + // Test lambda with multiple parameters (with index) + let sql_multi = "SELECT list_filter([1, 3, 1, 5], lambda x, i : x > i)"; + duckdb().verified_stmt(sql_multi); + + // Test lambda in list_transform + let sql_transform = "SELECT list_transform([1, 2, 3], lambda x : x * 2)"; + duckdb().verified_stmt(sql_transform); +} diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 386bab7f04..1b09485185 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -34,10 +34,12 @@ fn parse_table_create() { let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2", "asdf" = '1234', 'asdf' = "1234", "asdf" = 2)"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; let serdeproperties = r#"CREATE EXTERNAL TABLE IF NOT EXISTS db.table (a STRING, b STRING, c STRING) PARTITIONED BY (d STRING, e STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde.config' WITH SERDEPROPERTIES ('prop_a' = 'a', 'prop_b' = 'b') STORED AS TEXTFILE LOCATION 's3://...' TBLPROPERTIES ('prop_c' = 'c')"#; + let externaltable = r#"CREATE EXTERNAL TABLE t (c INT)"#; hive().verified_stmt(sql); hive().verified_stmt(iof); hive().verified_stmt(serdeproperties); + hive().verified_stmt(externaltable); } #[test] diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 37e8e962f3..e8ed79492d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -141,7 +141,9 @@ fn parse_create_procedure() { pipe_operators: vec![], body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Value( @@ -162,7 +164,7 @@ fn parse_create_procedure() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))) }))], @@ -253,7 +255,7 @@ fn parse_create_function() { default_expr: None, }, ]), - return_type: Some(DataType::Int(None)), + return_type: Some(FunctionReturnType::DataType(DataType::Int(None))), function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { begin_token: AttachedToken::empty(), statements: vec![Statement::Return(ReturnStatement { @@ -266,6 +268,8 @@ fn parse_create_function() { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], using: None, language: None, determinism_specifier: None, @@ -426,7 +430,7 @@ fn parse_create_function_parameter_default_values() { data_type: DataType::Int(None), default_expr: Some(Expr::Value((number("42")).with_empty_span())), },]), - return_type: Some(DataType::Int(None)), + return_type: Some(FunctionReturnType::DataType(DataType::Int(None))), function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { begin_token: AttachedToken::empty(), statements: vec![Statement::Return(ReturnStatement { @@ -439,6 +443,8 @@ fn parse_create_function_parameter_default_values() { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], using: None, language: None, determinism_specifier: None, @@ -490,7 +496,7 @@ fn parse_mssql_openjson() { json_expr: Expr::CompoundIdentifier( vec![Ident::new("A"), Ident::new("param"),] ), - json_path: Some(Value::SingleQuotedString("$.config".into())), + json_path: Some(Value::SingleQuotedString("$.config".into()).with_empty_span()), columns: vec![ OpenJsonTableColumn { name: Ident::new("kind"), @@ -652,7 +658,7 @@ fn parse_mssql_openjson() { json_expr: Expr::CompoundIdentifier( vec![Ident::new("A"), Ident::new("param"),] ), - json_path: Some(Value::SingleQuotedString("$.config".into())), + json_path: Some(Value::SingleQuotedString("$.config".into()).with_empty_span()), columns: vec![], alias: table_alias(true, "B") }, @@ -1344,7 +1350,9 @@ fn parse_substring_in_select() { body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: Some(Distinct::Distinct), + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Substring { @@ -1384,7 +1392,7 @@ fn parse_substring_in_select() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1501,7 +1509,9 @@ fn parse_mssql_declare() { body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { @@ -1526,7 +1536,7 @@ fn parse_mssql_declare() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))) })) @@ -1655,6 +1665,80 @@ fn test_parse_raiserror() { let _ = ms().verified_stmt(sql); } +#[test] +fn test_parse_throw() { + // THROW with arguments + let sql = r#"THROW 51000, 'Record does not exist.', 1"#; + let s = ms().verified_stmt(sql); + assert_eq!( + s, + Statement::Throw(ThrowStatement { + error_number: Some(Box::new(Expr::Value( + (Value::Number("51000".parse().unwrap(), false)).with_empty_span() + ))), + message: Some(Box::new(Expr::Value( + (Value::SingleQuotedString("Record does not exist.".to_string())).with_empty_span() + ))), + state: Some(Box::new(Expr::Value( + (Value::Number("1".parse().unwrap(), false)).with_empty_span() + ))), + }) + ); + + // THROW with variable references + let sql = r#"THROW @ErrorNumber, @ErrorMessage, @ErrorState"#; + let _ = ms().verified_stmt(sql); + + // Re-throw (no arguments) + let sql = r#"THROW"#; + let s = ms().verified_stmt(sql); + assert_eq!( + s, + Statement::Throw(ThrowStatement { + error_number: None, + message: None, + state: None, + }) + ); +} + +#[test] +fn test_parse_waitfor() { + // WAITFOR DELAY + let sql = "WAITFOR DELAY '00:00:05'"; + let stmt = ms_and_generic().verified_stmt(sql); + assert_eq!( + stmt, + Statement::WaitFor(WaitForStatement { + wait_type: WaitForType::Delay, + expr: Expr::Value( + (Value::SingleQuotedString("00:00:05".to_string())).with_empty_span() + ), + }) + ); + + // WAITFOR TIME + let sql = "WAITFOR TIME '14:30:00'"; + let stmt = ms_and_generic().verified_stmt(sql); + assert_eq!( + stmt, + Statement::WaitFor(WaitForStatement { + wait_type: WaitForType::Time, + expr: Expr::Value( + (Value::SingleQuotedString("14:30:00".to_string())).with_empty_span() + ), + }) + ); + + // WAITFOR DELAY with variable + let sql = "WAITFOR DELAY @WaitTime"; + let _ = ms_and_generic().verified_stmt(sql); + + // Error: WAITFOR without DELAY or TIME + let res = ms_and_generic().parse_sql_statements("WAITFOR '00:00:05'"); + assert!(res.is_err()); +} + #[test] fn parse_use() { let valid_object_names = [ @@ -1897,8 +1981,11 @@ fn parse_create_table_with_valid_options() { cluster_by: None, clustered_by: None, inherits: None, + partition_of: None, + for_values: None, strict: false, iceberg: false, + snapshot: false, copy_grants: false, enable_schema_evolution: None, change_tracking: None, @@ -1907,6 +1994,7 @@ fn parse_create_table_with_valid_options() { default_ddl_collation: None, with_aggregation_policy: None, with_row_access_policy: None, + with_storage_lifecycle_policy: None, with_tags: None, base_location: None, external_volume: None, @@ -1920,6 +2008,10 @@ fn parse_create_table_with_valid_options() { refresh_mode: None, initialize: None, require_user: false, + diststyle: None, + distkey: None, + sortkey: None, + backup: None, }) ); } @@ -2031,6 +2123,7 @@ fn parse_create_table_with_identity_column() { transient: false, volatile: false, iceberg: false, + snapshot: false, name: ObjectName::from(vec![Ident { value: "mytable".to_string(), quote_style: None, @@ -2064,6 +2157,8 @@ fn parse_create_table_with_identity_column() { cluster_by: None, clustered_by: None, inherits: None, + partition_of: None, + for_values: None, strict: false, copy_grants: false, enable_schema_evolution: None, @@ -2073,6 +2168,7 @@ fn parse_create_table_with_identity_column() { default_ddl_collation: None, with_aggregation_policy: None, with_row_access_policy: None, + with_storage_lifecycle_policy: None, with_tags: None, base_location: None, external_volume: None, @@ -2086,6 +2182,10 @@ fn parse_create_table_with_identity_column() { refresh_mode: None, initialize: None, require_user: false, + diststyle: None, + distkey: None, + sortkey: None, + backup: None, }), ); } @@ -2501,8 +2601,262 @@ fn test_tsql_no_semicolon_delimiter() { DECLARE @X AS NVARCHAR(MAX)='x' DECLARE @Y AS NVARCHAR(MAX)='y' "#; - let stmts = tsql().parse_sql_statements(sql).unwrap(); assert_eq!(stmts.len(), 2); assert!(stmts.iter().all(|s| matches!(s, Statement::Declare { .. }))); + + let sql = r#" +SELECT col FROM tbl +IF x=1 + SELECT 1 +ELSE + SELECT 2 + "#; + let stmts = tsql().parse_sql_statements(sql).unwrap(); + assert_eq!(stmts.len(), 2); + assert!(matches!(&stmts[0], Statement::Query(_))); + assert!(matches!(&stmts[1], Statement::If(_))); +} + +#[test] +fn test_sql_keywords_as_table_aliases() { + // Some keywords that should not be parsed as an alias implicitly or explicitly + let reserved_kws = vec!["IF", "ELSE"]; + for kw in reserved_kws { + for explicit in &["", "AS "] { + assert!(tsql() + .parse_sql_statements(&format!("SELECT * FROM tbl {explicit}{kw}")) + .is_err()); + } + } +} + +#[test] +fn test_sql_keywords_as_column_aliases() { + // Some keywords that should not be parsed as an alias implicitly or explicitly + let reserved_kws = vec!["IF", "ELSE"]; + for kw in reserved_kws { + for explicit in &["", "AS "] { + assert!(tsql() + .parse_sql_statements(&format!("SELECT col {explicit}{kw} FROM tbl")) + .is_err()); + } + } +} + +#[test] +fn parse_mssql_begin_end_block() { + // Single statement + let sql = "BEGIN SELECT 1; END"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + begin, + has_end_keyword, + statements, + transaction, + modifier, + .. + } => { + assert!(begin); + assert!(has_end_keyword); + assert!(transaction.is_none()); + assert!(modifier.is_none()); + assert_eq!(statements.len(), 1); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } + + // Multiple statements + let sql = "BEGIN SELECT 1; SELECT 2; END"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + statements, + has_end_keyword, + .. + } => { + assert!(has_end_keyword); + assert_eq!(statements.len(), 2); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } + + // DML inside BEGIN/END + let sql = "BEGIN INSERT INTO t VALUES (1); UPDATE t SET x = 2; END"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + statements, + has_end_keyword, + .. + } => { + assert!(has_end_keyword); + assert_eq!(statements.len(), 2); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } + + // BEGIN TRANSACTION still works + let sql = "BEGIN TRANSACTION"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + begin, + has_end_keyword, + transaction, + .. + } => { + assert!(begin); + assert!(!has_end_keyword); + assert!(transaction.is_some()); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } +} + +/// MSSQL supports `TRAN` as shorthand for `TRANSACTION`. +/// See +#[test] +fn parse_mssql_tran_shorthand() { + // BEGIN TRAN + let sql = "BEGIN TRAN"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + begin, + transaction, + has_end_keyword, + .. + } => { + assert!(begin); + assert_eq!(*transaction, Some(BeginTransactionKind::Tran)); + assert!(!has_end_keyword); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } + + // COMMIT TRAN normalizes to COMMIT (same as COMMIT TRANSACTION) + ms().one_statement_parses_to("COMMIT TRAN", "COMMIT"); + + // ROLLBACK TRAN normalizes to ROLLBACK (same as ROLLBACK TRANSACTION) + ms().one_statement_parses_to("ROLLBACK TRAN", "ROLLBACK"); +} + +#[test] +fn test_tsql_statement_keywords_not_implicit_aliases() { + // T-SQL statement-starting keywords must never be consumed as implicit + // aliases for a preceding SELECT item or table reference when using + // newline-delimited multi-statement scripts. + + // Without the fix, the parser would consume a statement-starting keyword + // as an implicit alias for the preceding SELECT item or table reference, + // then fail on the next token. Verify parsing succeeds and each input + // produces the expected number of statements. + + // Keywords that should not become implicit column aliases + let col_alias_cases: &[(&str, usize)] = &[ + ("select 1\ndeclare @x as int", 2), + ("select 1\nexec sp_who", 2), + ("select 1\ninsert into t values (1)", 2), + ("select 1\nupdate t set col=1", 2), + ("select 1\ndelete from t", 2), + ("select 1\ndrop table t", 2), + ("select 1\ncreate table t (id int)", 2), + ("select 1\nalter table t add col int", 2), + ("select 1\nreturn", 2), + ]; + for (sql, expected) in col_alias_cases { + let stmts = tsql() + .parse_sql_statements(sql) + .unwrap_or_else(|e| panic!("failed to parse {sql:?}: {e}")); + assert_eq!( + stmts.len(), + *expected, + "expected {expected} stmts for: {sql:?}" + ); + } + + // Keywords that should not become implicit table aliases + let tbl_alias_cases: &[(&str, usize)] = &[ + ("select * from t\ndeclare @x as int", 2), + ("select * from t\ndrop table t", 2), + ("select * from t\ncreate table u (id int)", 2), + ("select * from t\nexec sp_who", 2), + ]; + for (sql, expected) in tbl_alias_cases { + let stmts = tsql() + .parse_sql_statements(sql) + .unwrap_or_else(|e| panic!("failed to parse {sql:?}: {e}")); + assert_eq!( + stmts.len(), + *expected, + "expected {expected} stmts for: {sql:?}" + ); + } +} + +#[test] +fn test_exec_dynamic_sql() { + // EXEC (@sql) executes a dynamic SQL string held in a variable. + // It must parse as a single Execute statement and not attempt to parse + // parameters after the closing paren. + let stmts = tsql() + .parse_sql_statements("EXEC (@sql)") + .expect("EXEC (@sql) should parse"); + assert_eq!(stmts.len(), 1); + assert!( + matches!(&stmts[0], Statement::Execute { .. }), + "expected Execute, got: {:?}", + stmts[0] + ); + + // Verify that a statement following EXEC (@sql) on the next line is parsed + // as a separate statement and not consumed as a parameter. + let stmts = tsql() + .parse_sql_statements("EXEC (@sql)\nDROP TABLE #tmp") + .expect("EXEC (@sql) followed by DROP TABLE should parse"); + assert_eq!(stmts.len(), 2); +} + +// MSSQL OUTPUT clause on INSERT/UPDATE/DELETE +// https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql +#[test] +fn parse_mssql_insert_with_output() { + ms_and_generic().verified_stmt( + "INSERT INTO customers (name, email) OUTPUT INSERTED.id, INSERTED.name VALUES ('John', 'john@example.com')", + ); +} + +#[test] +fn parse_mssql_insert_with_output_into() { + ms_and_generic().verified_stmt( + "INSERT INTO customers (name, email) OUTPUT INSERTED.id, INSERTED.name INTO @new_ids VALUES ('John', 'john@example.com')", + ); +} + +#[test] +fn parse_mssql_delete_with_output() { + ms_and_generic().verified_stmt("DELETE FROM customers OUTPUT DELETED.* WHERE id = 1"); +} + +#[test] +fn parse_mssql_delete_with_output_into() { + ms_and_generic().verified_stmt( + "DELETE FROM customers OUTPUT DELETED.id, DELETED.name INTO @deleted_rows WHERE active = 0", + ); +} + +#[test] +fn parse_mssql_update_with_output() { + ms_and_generic().verified_stmt( + "UPDATE employees SET salary = salary * 1.1 OUTPUT INSERTED.id, DELETED.salary, INSERTED.salary WHERE department = 'Engineering'", + ); +} + +#[test] +fn parse_mssql_update_with_output_into() { + ms_and_generic().verified_stmt( + "UPDATE employees SET salary = salary * 1.1 OUTPUT INSERTED.id, DELETED.salary, INSERTED.salary INTO @changes WHERE department = 'Engineering'", + ); } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index e847d3edb7..269787c295 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -874,6 +874,25 @@ fn test_functional_key_part() { )), }), data_type: DataType::Unsigned, + array: false, + format: None, + })), + ); + assert_eq!( + index_column(mysql_and_generic().verified_stmt( + r#"CREATE TABLE t (jsoncol JSON, PRIMARY KEY ((CAST(col ->> '$.fields' AS UNSIGNED ARRAY)) ASC))"# + )), + Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col"))), + op: BinaryOperator::LongArrow, + right: Box::new(Expr::Value( + Value::SingleQuotedString("$.fields".to_string()).with_empty_span() + )), + }), + data_type: DataType::Unsigned, + array: true, format: None, })), ); @@ -925,6 +944,15 @@ fn parse_create_table_primary_and_unique_key_characteristic_test() { } } +#[test] +fn parse_create_table_column_key_options() { + mysql_and_generic().verified_stmt("CREATE TABLE foo (x INT UNIQUE KEY)"); + mysql_and_generic().one_statement_parses_to( + "CREATE TABLE foo (x INT KEY)", + "CREATE TABLE foo (x INT PRIMARY KEY)", + ); +} + #[test] fn parse_create_table_comment() { let without_equal = "CREATE TABLE foo (bar INT) COMMENT 'baz'"; @@ -1416,7 +1444,9 @@ fn parse_escaped_quote_identifiers_with_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { @@ -1439,7 +1469,7 @@ fn parse_escaped_quote_identifiers_with_escape() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1471,7 +1501,9 @@ fn parse_escaped_quote_identifiers_with_no_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { @@ -1494,7 +1526,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1518,8 +1550,9 @@ fn parse_escaped_backticks_with_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { @@ -1542,7 +1575,7 @@ fn parse_escaped_backticks_with_escape() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1570,8 +1603,9 @@ fn parse_escaped_backticks_with_no_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { @@ -1594,7 +1628,7 @@ fn parse_escaped_backticks_with_no_escape() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1879,7 +1913,13 @@ fn parse_simple_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert_eq!( Some(Box::new(Query { @@ -1944,7 +1984,13 @@ fn parse_ignore_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert!(ignore); assert_eq!( @@ -1994,7 +2040,13 @@ fn parse_priority_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert_eq!(priority, Some(HighPriority)); assert_eq!( @@ -2041,7 +2093,13 @@ fn parse_priority_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert_eq!(priority, Some(LowPriority)); assert_eq!( @@ -2089,7 +2147,10 @@ fn parse_insert_as() { TableObject::TableName(ObjectName::from(vec![Ident::with_quote('`', "table")])), table_name ); - assert_eq!(vec![Ident::with_quote('`', "date")], columns); + assert_eq!( + vec![ObjectName::from(Ident::with_quote('`', "date"))], + columns + ); let insert_alias = insert_alias.unwrap(); assert_eq!( @@ -2142,7 +2203,10 @@ fn parse_insert_as() { table_name ); assert_eq!( - vec![Ident::with_quote('`', "id"), Ident::with_quote('`', "date")], + vec![ + ObjectName::from(Ident::with_quote('`', "id")), + ObjectName::from(Ident::with_quote('`', "date")) + ], columns ); let insert_alias = insert_alias.unwrap(); @@ -2204,7 +2268,13 @@ fn parse_replace_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert!(replace_into); assert_eq!(priority, Some(Delayed)); @@ -2298,12 +2368,12 @@ fn parse_insert_with_on_duplicate_update() { ); assert_eq!( vec![ - Ident::new("name"), - Ident::new("description"), - Ident::new("perm_create"), - Ident::new("perm_read"), - Ident::new("perm_update"), - Ident::new("perm_delete") + ObjectName::from(Ident::new("name")), + ObjectName::from(Ident::new("description")), + ObjectName::from(Ident::new("perm_create")), + ObjectName::from(Ident::new("perm_read")), + ObjectName::from(Ident::new("perm_update")), + ObjectName::from(Ident::new("perm_delete")) ], columns ); @@ -2390,8 +2460,9 @@ fn parse_select_with_numeric_prefix_column_name() { q.body, Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new( @@ -2417,7 +2488,7 @@ fn parse_select_with_numeric_prefix_column_name() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))) ); @@ -2565,7 +2636,9 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { q.body, Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -2592,7 +2665,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))) ); @@ -2614,7 +2687,10 @@ fn parse_insert_with_numeric_prefix_column_name() { TableObject::TableName(ObjectName::from(vec![Ident::new("s1"), Ident::new("t1")])), table_name ); - assert_eq!(vec![Ident::new("123col_$@length123")], columns); + assert_eq!( + vec![ObjectName::from(Ident::new("123col_$@length123"))], + columns + ); } _ => unreachable!(), } @@ -2632,8 +2708,10 @@ fn parse_update_with_joins() { returning, or: None, limit: None, + optimizer_hints, update_token: _, - }) => { + output: _, + }) if optimizer_hints.is_empty() => { assert_eq!( TableWithJoins { relation: TableFactor::Table { @@ -3197,7 +3275,9 @@ fn parse_substring_in_select() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: Some(Distinct::Distinct), + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Substring { @@ -3237,7 +3317,7 @@ fn parse_substring_in_select() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -3438,6 +3518,27 @@ fn parse_create_table_unallow_constraint_then_index() { assert!(mysql_and_generic().parse_sql_statements(sql).is_ok()); } +#[test] +fn parse_create_table_constraint_check_without_name() { + let dialects = all_dialects_where(|d| d.supports_constraint_keyword_without_name()); + dialects.one_statement_parses_to( + "CREATE TABLE t (x INT, CONSTRAINT PRIMARY KEY (x))", + "CREATE TABLE t (x INT, PRIMARY KEY (x))", + ); + dialects.one_statement_parses_to( + "CREATE TABLE t (x INT, CONSTRAINT UNIQUE (x))", + "CREATE TABLE t (x INT, UNIQUE (x))", + ); + dialects.one_statement_parses_to( + "CREATE TABLE t (x INT, CONSTRAINT FOREIGN KEY (x) REFERENCES t2(id))", + "CREATE TABLE t (x INT, FOREIGN KEY (x) REFERENCES t2(id))", + ); + dialects.one_statement_parses_to( + "CREATE TABLE t (x INT, CONSTRAINT CHECK (x > 1))", + "CREATE TABLE t (x INT, CHECK (x > 1))", + ); +} + #[test] fn parse_create_table_with_fulltext_definition() { mysql_and_generic().verified_stmt("CREATE TABLE tb (id INT, FULLTEXT (id))"); @@ -3520,7 +3621,9 @@ fn parse_hex_string_introducer() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Prefixed { @@ -3545,7 +3648,7 @@ fn parse_hex_string_introducer() { qualify: None, value_table_mode: None, into: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -3573,6 +3676,14 @@ fn parse_div_infix() { mysql().verified_stmt(r#"SELECT 5 DIV 2"#); } +#[test] +fn parse_div_infix_propagates_parse_error() { + let err = mysql() + .parse_sql_statements("SELECT 5 DIV") + .expect_err("expected an error"); + assert_matches!(err, ParserError::ParserError(_)); +} + #[test] fn parse_drop_temporary_table() { let sql = "DROP TEMPORARY TABLE foo"; @@ -3699,14 +3810,14 @@ fn parse_json_table() { .relation, TableFactor::JsonTable { json_expr: Expr::Value((Value::SingleQuotedString("[1,2]".to_string())).with_empty_span()), - json_path: Value::SingleQuotedString("$[*]".to_string()), + json_path: Value::SingleQuotedString("$[*]".to_string()).with_empty_span(), columns: vec![ JsonTableColumn::Named(JsonTableNamedColumn { name: Ident::new("x"), r#type: DataType::Int(None), - path: Value::SingleQuotedString("$".to_string()), + path: Value::SingleQuotedString("$".to_string()).with_empty_span(), exists: false, - on_empty: Some(JsonTableColumnErrorHandling::Default(Value::SingleQuotedString("0".to_string()))), + on_empty: Some(JsonTableColumnErrorHandling::Default(Value::SingleQuotedString("0".to_string()).with_empty_span())), on_error: Some(JsonTableColumnErrorHandling::Null), }), ], @@ -3780,7 +3891,7 @@ fn parse_bitstring_literal() { fn parse_grant() { let sql = "GRANT ALL ON *.* TO 'jeffrey'@'%'"; let stmt = mysql().verified_stmt(sql); - if let Statement::Grant { + if let Statement::Grant(Grant { privileges, objects, grantees, @@ -3788,7 +3899,7 @@ fn parse_grant() { as_grantor: _, granted_by, current_grants: _, - } = stmt + }) = stmt { assert_eq!( privileges, @@ -3826,13 +3937,13 @@ fn parse_grant() { fn parse_revoke() { let sql = "REVOKE ALL ON db1.* FROM 'jeffrey'@'%'"; let stmt = mysql_and_generic().verified_stmt(sql); - if let Statement::Revoke { + if let Statement::Revoke(Revoke { privileges, objects, grantees, granted_by, cascade, - } = stmt + }) = stmt { assert_eq!( privileges, @@ -4096,6 +4207,14 @@ fn parse_cast_integers() { .expect_err("CAST doesn't allow display width"); } +#[test] +fn parse_cast_array() { + mysql().verified_expr("CAST(foo AS SIGNED ARRAY)"); + mysql() + .run_parser_method("CAST(foo AS ARRAY)", |p| p.parse_expr()) + .expect_err("ARRAY alone is not a type"); +} + #[test] fn parse_match_against_with_alias() { let sql = "SELECT tbl.ProjectID FROM surveys.tbl1 AS tbl WHERE MATCH (tbl.ReferenceID) AGAINST ('AAA' IN BOOLEAN MODE)"; @@ -4114,7 +4233,10 @@ fn parse_match_against_with_alias() { Ident::new("ReferenceID") ])] ); - assert_eq!(match_value, Value::SingleQuotedString("AAA".to_owned())); + assert_eq!( + match_value, + Value::SingleQuotedString("AAA".to_owned()).with_empty_span() + ); assert_eq!(opt_search_modifier, Some(SearchModifier::InBooleanMode)); } _ => unreachable!(), @@ -4239,6 +4361,187 @@ fn parse_straight_join() { .verified_stmt("SELECT a.*, b.* FROM table_a STRAIGHT_JOIN table_b AS b ON a.b_id = b.id"); } +#[test] +fn parse_distinctrow_to_distinct() { + mysql().one_statement_parses_to( + "SELECT DISTINCTROW * FROM employees", + "SELECT DISTINCT * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCTROW * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); +} + +#[test] +fn parse_select_straight_join() { + let select = mysql().verified_only_select( + "SELECT STRAIGHT_JOIN * FROM employees e JOIN dept_emp d ON e.emp_no = d.emp_no WHERE d.emp_no = 10001", + ); + assert!(select.select_modifiers.unwrap().straight_join); + + mysql().verified_stmt( + "SELECT STRAIGHT_JOIN e.emp_no, d.dept_no FROM employees e JOIN dept_emp d ON e.emp_no = d.emp_no", + ); + mysql().verified_stmt("SELECT DISTINCT STRAIGHT_JOIN emp_no FROM employees"); + + let select = mysql().verified_only_select("SELECT * FROM employees"); + assert!(select.select_modifiers.is_none()); +} + +#[test] +fn parse_select_modifiers() { + let select = mysql().verified_only_select("SELECT HIGH_PRIORITY * FROM employees"); + assert!(select.select_modifiers.as_ref().unwrap().high_priority); + assert!(!select.select_modifiers.unwrap().straight_join); + + let select = mysql().verified_only_select("SELECT SQL_SMALL_RESULT * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_small_result); + + let select = mysql().verified_only_select("SELECT SQL_BIG_RESULT * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_big_result); + + let select = mysql().verified_only_select("SELECT SQL_BUFFER_RESULT * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_buffer_result); + + let select = mysql().verified_only_select("SELECT SQL_NO_CACHE * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_no_cache); + + let select = mysql().verified_only_select("SELECT SQL_CALC_FOUND_ROWS * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_calc_found_rows); + + let select = mysql().verified_only_select( + "SELECT HIGH_PRIORITY STRAIGHT_JOIN SQL_SMALL_RESULT SQL_BIG_RESULT SQL_BUFFER_RESULT SQL_NO_CACHE SQL_CALC_FOUND_ROWS * FROM employees", + ); + assert!(select.select_modifiers.as_ref().unwrap().high_priority); + assert!(select.select_modifiers.as_ref().unwrap().straight_join); + assert!(select.select_modifiers.as_ref().unwrap().sql_small_result); + assert!(select.select_modifiers.as_ref().unwrap().sql_big_result); + assert!(select.select_modifiers.as_ref().unwrap().sql_buffer_result); + assert!(select.select_modifiers.as_ref().unwrap().sql_no_cache); + assert!(select.select_modifiers.unwrap().sql_calc_found_rows); + + mysql().verified_stmt("SELECT DISTINCT HIGH_PRIORITY emp_no FROM employees"); + mysql().verified_stmt("SELECT DISTINCT SQL_CALC_FOUND_ROWS emp_no FROM employees"); + mysql().verified_stmt("SELECT HIGH_PRIORITY STRAIGHT_JOIN e.emp_no, d.dept_no FROM employees e JOIN dept_emp d ON e.emp_no = d.emp_no"); +} + +#[test] +fn parse_select_modifiers_any_order() { + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCT * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_CALC_FOUND_ROWS DISTINCT HIGH_PRIORITY * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY SQL_CALC_FOUND_ROWS * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCT SQL_SMALL_RESULT * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY SQL_SMALL_RESULT * FROM employees", + ); + + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCTROW * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); + + mysql().verified_stmt("SELECT ALL * FROM employees"); + mysql().verified_stmt("SELECT ALL HIGH_PRIORITY * FROM employees"); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY ALL * FROM employees", + "SELECT ALL HIGH_PRIORITY * FROM employees", + ); + + let select = mysql().verified_only_select_with_canonical( + "SELECT HIGH_PRIORITY DISTINCT * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); + assert!(select.select_modifiers.unwrap().high_priority); + assert!(matches!(select.distinct, Some(Distinct::Distinct))); + + let select = mysql().verified_only_select_with_canonical( + "SELECT SQL_CALC_FOUND_ROWS ALL HIGH_PRIORITY * FROM employees", + "SELECT ALL HIGH_PRIORITY SQL_CALC_FOUND_ROWS * FROM employees", + ); + assert!(select.select_modifiers.as_ref().unwrap().high_priority); + assert!(select.select_modifiers.unwrap().sql_calc_found_rows); + assert_eq!(select.distinct, Some(Distinct::All)) +} + +#[test] +fn parse_select_modifiers_can_be_repeated() { + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY HIGH_PRIORITY * FROM employees", + "SELECT HIGH_PRIORITY * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_CALC_FOUND_ROWS SQL_CALC_FOUND_ROWS * FROM employees", + "SELECT SQL_CALC_FOUND_ROWS * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT STRAIGHT_JOIN STRAIGHT_JOIN * FROM employees", + "SELECT STRAIGHT_JOIN * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_NO_CACHE SQL_NO_CACHE * FROM employees", + "SELECT SQL_NO_CACHE * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCT HIGH_PRIORITY * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_CALC_FOUND_ROWS DISTINCT SQL_CALC_FOUND_ROWS * FROM employees", + "SELECT DISTINCT SQL_CALC_FOUND_ROWS * FROM employees", + ); +} + +#[test] +fn parse_select_modifiers_canonical_ordering() { + mysql().one_statement_parses_to( + "SELECT SQL_CALC_FOUND_ROWS SQL_NO_CACHE SQL_BUFFER_RESULT SQL_BIG_RESULT SQL_SMALL_RESULT STRAIGHT_JOIN HIGH_PRIORITY * FROM employees", + "SELECT HIGH_PRIORITY STRAIGHT_JOIN SQL_SMALL_RESULT SQL_BIG_RESULT SQL_BUFFER_RESULT SQL_NO_CACHE SQL_CALC_FOUND_ROWS * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_NO_CACHE DISTINCT SQL_CALC_FOUND_ROWS * FROM employees", + "SELECT DISTINCT SQL_NO_CACHE SQL_CALC_FOUND_ROWS * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY STRAIGHT_JOIN DISTINCT SQL_SMALL_RESULT * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY STRAIGHT_JOIN SQL_SMALL_RESULT * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY ALL STRAIGHT_JOIN * FROM employees", + "SELECT ALL HIGH_PRIORITY STRAIGHT_JOIN * FROM employees", + ); +} + +#[test] +fn parse_select_modifiers_errors() { + assert!(mysql() + .parse_sql_statements("SELECT DISTINCT DISTINCT * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT DISTINCTROW DISTINCTROW * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT DISTINCT DISTINCTROW * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT ALL DISTINCT * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT DISTINCT ALL * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT ALL DISTINCTROW * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT ALL ALL * FROM t") + .is_err()); +} + #[test] fn mysql_foreign_key_with_index_name() { mysql().verified_stmt( @@ -4354,3 +4657,171 @@ fn test_create_index_options() { "CREATE INDEX idx_name ON t(c1, c2) USING BTREE LOCK = EXCLUSIVE ALGORITHM = DEFAULT", ); } + +#[test] +fn test_optimizer_hints() { + let mysql_dialect = mysql_and_generic(); + + // ~ selects + mysql_dialect.verified_stmt( + "\ + SELECT /*+ SET_VAR(optimizer_switch = 'mrr_cost_based=off') \ + SET_VAR(max_heap_table_size = 1G) */ 1", + ); + + mysql_dialect.verified_stmt( + "\ + SELECT /*+ SET_VAR(target_partitions=1) */ * FROM \ + (SELECT /*+ SET_VAR(target_partitions=8) */ * FROM t1 LIMIT 1) AS dt", + ); + + // ~ inserts / replace + mysql_dialect.verified_stmt( + "\ + INSERT /*+ RESOURCE_GROUP(Batch) */ \ + INTO t2 VALUES (2)", + ); + + mysql_dialect.verified_stmt( + "\ + REPLACE /*+ foobar */ INTO test \ + VALUES (1, 'Old', '2014-08-20 18:47:00')", + ); + + // ~ updates + mysql_dialect.verified_stmt( + "\ + UPDATE /*+ quux */ table_name \ + SET column1 = 1 \ + WHERE 1 = 1", + ); + + // ~ deletes + mysql_dialect.verified_stmt( + "\ + DELETE /*+ foobar */ FROM table_name", + ); + + // prefixed hints: any alphanumeric prefix before `+` is captured + let select = mysql_dialect.verified_only_select("SELECT /*abc+ text */ 1"); + assert_eq!(select.optimizer_hints.len(), 1); + assert_eq!(select.optimizer_hints[0].prefix, "abc"); + assert_eq!(select.optimizer_hints[0].text, " text "); + + // multiple hints with different prefixes + let select = mysql_dialect.verified_only_select("SELECT /*+ A */ /*x2+ B */ 1"); + assert_eq!(select.optimizer_hints.len(), 2); + assert_eq!(select.optimizer_hints[0].prefix, ""); + assert_eq!(select.optimizer_hints[0].text, " A "); + assert_eq!(select.optimizer_hints[1].prefix, "x2"); + assert_eq!(select.optimizer_hints[1].text, " B "); + + // hints mixed with regular comments: regular comments are skipped + let select = mysql_dialect.verified_only_select_with_canonical( + "SELECT /*+ A */ /* Regular comment */ /*x2+ B */ 1", + "SELECT /*+ A */ /*x2+ B */ 1", + ); + assert_eq!(select.optimizer_hints.len(), 2); + assert_eq!(select.optimizer_hints[0].prefix, ""); + assert_eq!(select.optimizer_hints[0].text, " A "); + assert_eq!(select.optimizer_hints[1].prefix, "x2"); + assert_eq!(select.optimizer_hints[1].text, " B "); + + // prefixed hints in INSERT/UPDATE/DELETE + mysql_dialect.verified_stmt("INSERT /*abc+ append */ INTO t2 VALUES (2)"); + mysql_dialect.verified_stmt("UPDATE /*abc+ PARALLEL */ table_name SET column1 = 1"); + mysql_dialect.verified_stmt("DELETE /*abc+ ENABLE_DML */ FROM table_name"); +} + +#[test] +fn parse_create_database_with_charset() { + // Test DEFAULT CHARACTER SET with = sign + mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4"); + + // Test DEFAULT CHARACTER SET without = sign (normalized form) + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb DEFAULT CHARACTER SET = utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test CHARACTER SET without DEFAULT + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb CHARACTER SET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test CHARSET shorthand + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb CHARSET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test DEFAULT CHARSET shorthand + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb DEFAULT CHARSET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test DEFAULT COLLATE + mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci"); + + // Test COLLATE without DEFAULT + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci", + "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci", + ); + + // Test both CHARACTER SET and COLLATE together + mysql_and_generic().verified_stmt( + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci", + ); + + // Test IF NOT EXISTS with CHARACTER SET + mysql_and_generic() + .verified_stmt("CREATE DATABASE IF NOT EXISTS mydb DEFAULT CHARACTER SET utf16"); + + // Test the exact syntax from the issue + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET = utf16", + "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET utf16", + ); +} + +#[test] +fn parse_create_database_with_charset_errors() { + // Missing charset name after CHARACTER SET + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb DEFAULT CHARACTER SET") + .is_err()); + + // Missing charset name after CHARSET + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb CHARSET") + .is_err()); + + // Missing collation name after COLLATE + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb DEFAULT COLLATE") + .is_err()); + + // Equals sign but no value + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb CHARACTER SET =") + .is_err()); +} + +#[test] +fn parse_create_database_with_charset_option_ordering() { + // MySQL allows COLLATE before CHARACTER SET - output is normalized to CHARACTER SET first + // (matches MySQL's own SHOW CREATE DATABASE output order) + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci DEFAULT CHARACTER SET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci", + ); + + // COLLATE first without DEFAULT keywords + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci CHARACTER SET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci", + ); +} diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs new file mode 100644 index 0000000000..35f083111e --- /dev/null +++ b/tests/sqlparser_oracle.rs @@ -0,0 +1,544 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Test SQL syntax, specific to [sqlparser::dialect::OracleDialect]. + +#[cfg(test)] +use pretty_assertions::assert_eq; + +use sqlparser::{ + ast::{ + BinaryOperator, Expr, Ident, Insert, ObjectName, Query, QuoteDelimitedString, SetExpr, + Statement, TableAliasWithoutColumns, TableObject, Value, ValueWithSpan, + }, + dialect::OracleDialect, + parser::ParserError, + tokenizer::Span, +}; +use test_utils::{all_dialects_where, expr_from_projection, number, TestedDialects}; + +mod test_utils; + +fn oracle() -> TestedDialects { + TestedDialects::new(vec![Box::new(OracleDialect)]) +} + +/// Convenience constructor for [QuoteDelimitedstring]. +fn quote_delimited_string( + start_quote: char, + value: &'static str, + end_quote: char, +) -> QuoteDelimitedString { + QuoteDelimitedString { + start_quote, + value: value.into(), + end_quote, + } +} + +/// Oracle: `||` has a lower precedence than `*` and `/` +#[test] +fn muldiv_have_higher_precedence_than_strconcat() { + // ............... A .. B ...... C .. D ........... + let sql = "SELECT 3 / 5 || 'asdf' || 7 * 9 FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + expr_from_projection(&select.projection[0]), + // (C || D) + &Expr::BinaryOp { + // (A || B) + left: Box::new(Expr::BinaryOp { + // A + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("3").into())), + op: BinaryOperator::Divide, + right: Box::new(Expr::Value(number("5").into())), + }), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Value(ValueWithSpan { + value: Value::SingleQuotedString("asdf".into()), + span: Span::empty(), + })), + }), + op: BinaryOperator::StringConcat, + // D + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("7").into())), + op: BinaryOperator::Multiply, + right: Box::new(Expr::Value(number("9").into())), + }), + } + ); +} + +/// Oracle: `+`, `-`, and `||` have the same precedence and parse from left-to-right +#[test] +fn plusminus_have_same_precedence_as_strconcat() { + // ................ A .. B .... C .. D ............ + let sql = "SELECT 3 + 5 || '.3' || 7 - 9 FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + expr_from_projection(&select.projection[0]), + // D + &Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + // B + left: Box::new(Expr::BinaryOp { + // A + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("3").into())), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("5").into())), + }), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Value(ValueWithSpan { + value: Value::SingleQuotedString(".3".into()), + span: Span::empty(), + })), + }), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Value(number("7").into())), + }), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("9").into())) + } + ); +} + +#[test] +fn parse_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT Q'.abc.', \ + Q'Xab'cX', \ + Q'|abc'''|', \ + Q'{abc}d}', \ + Q'[]abc[]', \ + Q'', \ + Q'<<', \ + Q'('abc'('abc)', \ + Q'(abc'def))', \ + Q'(abc'def)))' \ + FROM dual"; + let select = dialect.verified_only_select(sql); + assert_eq!(10, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('X', "ab'c", 'X'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('|', "abc'''", '|'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('{', "abc}d", '}'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('[', "]abc[", ']'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "a'bc", '>'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[5]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "<'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[6]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "'abc'('abc", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[7]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def)", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[8]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def))", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[9]) + ); +} + +#[test] +fn parse_invalid_quote_delimited_strings() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + // invalid quote delimiter + for q in [' ', '\t', '\r', '\n'] { + assert_eq!( + dialect.parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")), + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with quote char {q:?}" + ); + } + // invalid eof after quote + assert_eq!( + dialect.parse_sql_statements("SELECT Q'"), + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with EOF quote char" + ); + // unterminated string + assert_eq!( + dialect.parse_sql_statements("SELECT Q'|asdfa...."), + Err(ParserError::TokenizerError( + "Unterminated string literal at Line: 1, Column: 9".into() + )), + "with EOF quote char" + ); +} + +#[test] +fn parse_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "select q'!a'b'c!d!' from dual"; + let select = dialect.verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual"); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral(quote_delimited_string('!', "a'b'c!d", '!')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT q, quux, q.abc FROM dual q"; + let select = dialect.verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "q")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "quux")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "q"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} + +#[test] +fn parse_national_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT NQ'.abc.' FROM dual"; + let select = dialect.verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_national_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + for prefix in ["nq", "Nq", "nQ", "NQ"] { + let select = dialect.verified_only_select_with_canonical( + &format!("select {prefix}'!a'b'c!d!' from dual"), + "SELECT NQ'!a'b'c!d!' FROM dual", + ); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string( + '!', "a'b'c!d", '!' + )) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + } +} + +#[test] +fn parse_national_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT nq, nqoo, nq.abc FROM dual q"; + let select = dialect.verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nq")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "nq"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} + +#[test] +fn test_optimizer_hints() { + let oracle_dialect = oracle(); + + // selects: all `/*+...*/` comments are collected as hints + let select = oracle_dialect.verified_only_select_with_canonical( + "SELECT /*+one two three*/ /*+four five six*/ 1 FROM dual", + "SELECT /*+one two three*/ /*+four five six*/ 1 FROM dual", + ); + assert_eq!(select.optimizer_hints.len(), 2); + assert_eq!(select.optimizer_hints[0].text, "one two three"); + assert_eq!(select.optimizer_hints[0].prefix, ""); + assert_eq!(select.optimizer_hints[1].text, "four five six"); + + // regular comments are skipped, hints after them are still collected + let select = oracle_dialect.verified_only_select_with_canonical( + "SELECT /*one two three*/ /*+four five six*/ 1 FROM dual", + "SELECT /*+four five six*/ 1 FROM dual", + ); + assert_eq!(select.optimizer_hints.len(), 1); + assert_eq!(select.optimizer_hints[0].text, "four five six"); + + let select = oracle_dialect.verified_only_select_with_canonical( + "SELECT --+ one two three /* asdf */\n 1 FROM dual", + "SELECT --+ one two three /* asdf */\n 1 FROM dual", + ); + assert_eq!(select.optimizer_hints.len(), 1); + assert_eq!( + select.optimizer_hints[0].text, + " one two three /* asdf */\n" + ); + assert_eq!(select.optimizer_hints[0].prefix, ""); + + // inserts + oracle_dialect.verified_stmt("INSERT /*+ append */ INTO t1 SELECT * FROM all_objects"); + + // updates + oracle_dialect.verified_stmt("UPDATE /*+ DISABLE_PARALLEL_DML */ table_name SET column1 = 1"); + + // deletes + oracle_dialect.verified_stmt("DELETE --+ ENABLE_PARALLEL_DML\n FROM table_name"); + + // merges + oracle_dialect.verified_stmt( + "MERGE /*+ CLUSTERING */ INTO people_target pt \ + USING people_source ps \ + ON (pt.person_id = ps.person_id) \ + WHEN NOT MATCHED THEN INSERT \ + (pt.person_id, pt.first_name, pt.last_name, pt.title) \ + VALUES (ps.person_id, ps.first_name, ps.last_name, ps.title)", + ); + + // single-line prefixed hint (Oracle supports `--` without trailing whitespace) + let select = oracle_dialect.verified_only_select_with_canonical( + "SELECT --abc+ text\n 1 FROM dual", + "SELECT --abc+ text\n 1 FROM dual", + ); + assert_eq!(select.optimizer_hints.len(), 1); + assert_eq!(select.optimizer_hints[0].prefix, "abc"); + assert_eq!(select.optimizer_hints[0].text, " text\n"); +} + +#[test] +fn test_connect_by() { + let oracle_dialect = oracle(); + + oracle_dialect.verified_only_select( + "SELECT last_name AS \"Employee\", CONNECT_BY_ISCYCLE AS \"Cycle\", \ + LEVEL, \ + SYS_CONNECT_BY_PATH(last_name, '/') AS \"Path\" \ + FROM employees \ + WHERE level <= 3 AND department_id = 80 \ + START WITH last_name = 'King' \ + CONNECT BY NOCYCLE PRIOR employee_id = manager_id AND LEVEL <= 4 \ + ORDER BY \"Employee\", \"Cycle\", LEVEL, \"Path\"", + ); + + // CONNECT_BY_ROOT + oracle_dialect.verified_only_select( + "SELECT last_name AS \"Employee\", CONNECT_BY_ROOT last_name AS \"Manager\", \ + LEVEL - 1 AS \"Pathlen\", SYS_CONNECT_BY_PATH(last_name, '/') AS \"Path\" \ + FROM employees \ + WHERE LEVEL > 1 AND department_id = 110 \ + CONNECT BY PRIOR employee_id = manager_id \ + ORDER BY \"Employee\", \"Manager\", \"Pathlen\", \"Path\"", + ); +} + +#[test] +fn test_insert_with_table_alias() { + let oracle_dialect = oracle(); + + fn verify_table_name_with_alias(stmt: &Statement, exp_table_name: &str, exp_table_alias: &str) { + assert!(matches!(stmt, + Statement::Insert(Insert { + table: TableObject::TableName(table_name), + table_alias: Some(TableAliasWithoutColumns { + explicit: false, + alias: Ident { + value: table_alias, + quote_style: None, + span: _ + } + }), + .. + }) + if table_alias == exp_table_alias + && table_name == &ObjectName::from(vec![Ident { + value: exp_table_name.into(), + quote_style: None, + span: Span::empty(), + }]) + )); + } + + let stmt = oracle_dialect.verified_stmt( + "INSERT INTO foo_t t \ + SELECT 1, 2, 3 FROM dual", + ); + verify_table_name_with_alias(&stmt, "foo_t", "t"); + + let stmt = oracle_dialect.verified_stmt( + "INSERT INTO foo_t asdf (a, b, c) \ + SELECT 1, 2, 3 FROM dual", + ); + verify_table_name_with_alias(&stmt, "foo_t", "asdf"); + + let stmt = oracle_dialect.verified_stmt( + "INSERT INTO foo_t t (a, b, c) \ + VALUES (1, 2, 3)", + ); + verify_table_name_with_alias(&stmt, "foo_t", "t"); + + let stmt = oracle_dialect.verified_stmt( + "INSERT INTO foo_t t \ + VALUES (1, 2, 3)", + ); + verify_table_name_with_alias(&stmt, "foo_t", "t"); + + let stmt = + oracle_dialect.verified_stmt("INSERT INTO foo_t t (t.id, t.val) SELECT 1, 2 FROM dual"); + verify_table_name_with_alias(&stmt, "foo_t", "t"); + if let Statement::Insert(Insert { columns, .. }) = stmt { + assert_eq!( + vec![ + ObjectName::from(vec![Ident::new("t"), Ident::new("id")]), + ObjectName::from(vec![Ident::new("t"), Ident::new("val")]) + ], + columns + ); + } else { + panic!("not an insert statement"); + }; +} + +#[test] +fn test_insert_without_alias() { + let oracle_dialect = oracle(); + + // check DEFAULT + let sql = "INSERT INTO t default SELECT 'a' FROM dual"; + assert_eq!( + oracle_dialect.parse_sql_statements(sql), + Err(ParserError::ParserError( + "Expected: SELECT, VALUES, or a subquery in the query body, found: default".into() + )) + ); + + // check SELECT + let sql = "INSERT INTO t SELECT 'a' FROM dual"; + let stmt = oracle_dialect.verified_stmt(sql); + assert!(matches!( + &stmt, + Statement::Insert(Insert { + table_alias: None, + source: Some(source), + .. + }) + if matches!(&**source, Query { body, .. } if matches!(&**body, SetExpr::Select(_))))); + + // check WITH + let sql = "INSERT INTO dual WITH w AS (SELECT 1 AS y FROM dual) SELECT y FROM w"; + let stmt = oracle_dialect.verified_stmt(sql); + assert!(matches!( + &stmt, + Statement::Insert(Insert { + table_alias: None, + source: Some(source), + .. + }) + if matches!(&**source, Query { body, .. } if matches!(&**body, SetExpr::Select(_))))); + + // check VALUES + let sql = "INSERT INTO t VALUES (1)"; + let stmt = oracle_dialect.verified_stmt(sql); + assert!(matches!( + stmt, + Statement::Insert(Insert { + table_alias: None, + source: Some(source), + .. + }) + if matches!(&*source, Query { body, .. } if matches!(&**body, SetExpr::Values(_))) + )); +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 96e0414571..af0f2be334 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -23,15 +23,11 @@ mod test_utils; use helpers::attached_token::AttachedToken; -use sqlparser::ast::{ - DataType, DropBehavior, DropOperator, DropOperatorClass, DropOperatorSignature, -}; -use sqlparser::tokenizer::Span; -use test_utils::*; - use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, PostgreSqlDialect}; use sqlparser::parser::ParserError; +use sqlparser::tokenizer::Span; +use test_utils::*; #[test] fn parse_create_table_generated_always_as_identity() { @@ -516,6 +512,13 @@ fn parse_create_table_with_defaults() { } } +#[test] +fn parse_cast_in_default_expr() { + pg().verified_stmt("CREATE TABLE t (c TEXT DEFAULT (foo())::TEXT)"); + pg().verified_stmt("CREATE TABLE t (c TEXT DEFAULT (foo())::INT::TEXT)"); + pg().verified_stmt("CREATE TABLE t (c TEXT DEFAULT (foo())::TEXT NOT NULL)"); +} + #[test] fn parse_create_table_from_pg_dump() { let sql = "CREATE TABLE public.customer ( @@ -590,6 +593,45 @@ fn parse_create_table_constraints_only() { }; } +#[test] +fn parse_create_table_like_with_defaults() { + let sql = "CREATE TABLE new (LIKE old INCLUDING DEFAULTS)"; + match pg().verified_stmt(sql) { + Statement::CreateTable(stmt) => { + assert_eq!( + stmt.name, + ObjectName::from(vec![Ident::new("new".to_string())]) + ); + assert_eq!( + stmt.like, + Some(CreateTableLikeKind::Parenthesized(CreateTableLike { + name: ObjectName::from(vec![Ident::new("old".to_string())]), + defaults: Some(CreateTableLikeDefaults::Including), + })) + ) + } + _ => unreachable!(), + } + + let sql = "CREATE TABLE new (LIKE old EXCLUDING DEFAULTS)"; + match pg().verified_stmt(sql) { + Statement::CreateTable(stmt) => { + assert_eq!( + stmt.name, + ObjectName::from(vec![Ident::new("new".to_string())]) + ); + assert_eq!( + stmt.like, + Some(CreateTableLikeKind::Parenthesized(CreateTableLike { + name: ObjectName::from(vec![Ident::new("old".to_string())]), + defaults: Some(CreateTableLikeDefaults::Excluding), + })) + ) + } + _ => unreachable!(), + } +} + #[test] fn parse_alter_table_constraints_rename() { match alter_table_op( @@ -624,6 +666,45 @@ fn parse_alter_table_constraints_unique_nulls_distinct() { pg_and_generic().verified_stmt("ALTER TABLE t ADD CONSTRAINT b UNIQUE (c)"); } +#[test] +fn parse_alter_table_constraint_using_index() { + // PRIMARY KEY USING INDEX + // https://www.postgresql.org/docs/current/sql-altertable.html + let sql = "ALTER TABLE tab ADD CONSTRAINT c PRIMARY KEY USING INDEX my_index"; + match pg_and_generic().verified_stmt(sql) { + Statement::AlterTable(alter_table) => match &alter_table.operations[0] { + AlterTableOperation::AddConstraint { + constraint: TableConstraint::PrimaryKeyUsingIndex(c), + .. + } => { + assert_eq!(c.name.as_ref().unwrap().to_string(), "c"); + assert_eq!(c.index_name.to_string(), "my_index"); + assert!(c.characteristics.is_none()); + } + _ => unreachable!(), + }, + _ => unreachable!(), + } + + // UNIQUE USING INDEX + pg_and_generic().verified_stmt("ALTER TABLE tab ADD CONSTRAINT c UNIQUE USING INDEX my_index"); + + // Without constraint name + pg_and_generic().verified_stmt("ALTER TABLE tab ADD PRIMARY KEY USING INDEX my_index"); + pg_and_generic().verified_stmt("ALTER TABLE tab ADD UNIQUE USING INDEX my_index"); + + // With DEFERRABLE + pg_and_generic().verified_stmt( + "ALTER TABLE tab ADD CONSTRAINT c PRIMARY KEY USING INDEX my_index DEFERRABLE", + ); + pg_and_generic().verified_stmt( + "ALTER TABLE tab ADD CONSTRAINT c UNIQUE USING INDEX my_index NOT DEFERRABLE INITIALLY IMMEDIATE", + ); + pg_and_generic().verified_stmt( + "ALTER TABLE tab ADD CONSTRAINT c PRIMARY KEY USING INDEX my_index DEFERRABLE INITIALLY DEFERRED", + ); +} + #[test] fn parse_alter_table_disable() { pg_and_generic().verified_stmt("ALTER TABLE tab DISABLE ROW LEVEL SECURITY"); @@ -644,6 +725,8 @@ fn parse_alter_table_enable() { pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE REPLICA TRIGGER trigger_name"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE REPLICA RULE rule_name"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE ROW LEVEL SECURITY"); + pg_and_generic().verified_stmt("ALTER TABLE tab FORCE ROW LEVEL SECURITY"); + pg_and_generic().verified_stmt("ALTER TABLE tab NO FORCE ROW LEVEL SECURITY"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE RULE rule_name"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE TRIGGER ALL"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE TRIGGER USER"); @@ -1040,6 +1123,62 @@ PHP ₱ USD $ pg_and_generic().one_statement_parses_to(sql, ""); } +#[test] +fn parse_copy_from_stdin_without_semicolon() { + let stmt = pg().verified_stmt("COPY bitwise_test FROM STDIN NULL 'null'"); + assert_eq!( + stmt, + Statement::Copy { + source: CopySource::Table { + table_name: ObjectName::from(vec!["bitwise_test".into()]), + columns: vec![], + }, + to: false, + target: CopyTarget::Stdin, + options: vec![], + legacy_options: vec![CopyLegacyOption::Null("null".into())], + values: vec![], + } + ); +} + +#[test] +fn parse_copy_from_stdin_without_semicolon_variants() { + // This covers additional COPY ... FROM STDIN shapes without inline payload. + // `parse_copy_from_stdin_without_semicolon` asserts the legacy NULL option details. + let cases = [ + "COPY varbit_table FROM STDIN", + "COPY bit_table FROM STDIN", + "COPY copytest2 (test) FROM STDIN", + "COPY copytest3 FROM STDIN CSV HEADER", + "COPY copytest4 FROM STDIN (HEADER)", + "COPY parted_copytest FROM STDIN", + "COPY tab_progress_reporting FROM STDIN", + "COPY oversized_column_default FROM STDIN", + "COPY x (a, b, c, d, e) FROM STDIN", + "COPY header_copytest (c, a) FROM STDIN", + "COPY atest5 (two) FROM STDIN", + "COPY main_table (a, b) FROM STDIN", + ]; + + for sql in cases { + match pg().verified_stmt(sql) { + Statement::Copy { + to: false, + target: CopyTarget::Stdin, + values, + .. + } => { + assert!( + values.is_empty(), + "expected no inline COPY payload for `{sql}`" + ); + } + _ => panic!("expected COPY ... FROM STDIN statement for `{sql}`"), + } + } +} + #[test] fn test_copy_from() { let stmt = pg().verified_stmt("COPY users FROM 'data.csv'"); @@ -1286,7 +1425,9 @@ fn parse_copy_to() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -1324,7 +1465,7 @@ fn parse_copy_to() { sort_by: vec![], qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1710,6 +1851,7 @@ fn parse_execute() { (Value::Number("1337".parse().unwrap(), false)).with_empty_span() )), data_type: DataType::SmallInt(None), + array: false, format: None }, alias: None @@ -1721,6 +1863,7 @@ fn parse_execute() { (Value::Number("7331".parse().unwrap(), false)).with_empty_span() )), data_type: DataType::SmallInt(None), + array: false, format: None }, alias: None @@ -2347,6 +2490,7 @@ fn parse_array_index_expr() { ))), None )), + array: false, format: None, }))), access_chain: vec![ @@ -2572,11 +2716,17 @@ fn parse_create_indices_with_operator_classes() { IndexType::SPGiST, IndexType::Custom("CustomIndexType".into()), ]; - let operator_classes: [Option; 4] = [ + let operator_classes: [Option; 4] = [ None, - Some("gin_trgm_ops".into()), - Some("gist_trgm_ops".into()), - Some("totally_not_valid".into()), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "gin_trgm_ops", + ))])), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "gist_trgm_ops", + ))])), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "totally_not_valid", + ))])), ]; for expected_index_type in indices { @@ -2713,6 +2863,36 @@ fn parse_create_indices_with_operator_classes() { } } +#[test] +fn parse_create_index_with_schema_qualified_operator_class() { + let sql = "CREATE INDEX my_index ON my_table USING HNSW (embedding public.vector_cosine_ops)"; + + match pg().verified_stmt(sql) { + Statement::CreateIndex(CreateIndex { columns, .. }) => { + assert_eq!(1, columns.len()); + let idx_col = &columns[0]; + + // Verify the column name + match &idx_col.column.expr { + Expr::Identifier(ident) => { + assert_eq!("embedding", ident.value); + } + _ => panic!("Expected identifier expression"), + } + + // Verify the schema-qualified operator class + assert_eq!( + Some(ObjectName(vec![ + ObjectNamePart::Identifier(Ident::new("public")), + ObjectNamePart::Identifier(Ident::new("vector_cosine_ops")), + ])), + idx_col.operator_class + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_bloom() { let sql = @@ -3027,7 +3207,9 @@ fn parse_array_subquery_expr() { set_quantifier: SetQuantifier::None, left: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Value( @@ -3048,12 +3230,14 @@ fn parse_array_subquery_expr() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), right: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hints: vec![], distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Value( @@ -3074,7 +3258,7 @@ fn parse_array_subquery_expr() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), }), @@ -3103,7 +3287,7 @@ fn test_transaction_statement() { statement, Statement::Set(Set::SetTransaction { modes: vec![], - snapshot: Some(Value::SingleQuotedString(String::from("000003A1-1"))), + snapshot: Some(Value::SingleQuotedString(String::from("000003A1-1")).with_empty_span()), session: false }) ); @@ -4257,11 +4441,13 @@ $$"#; DataType::Varchar(None), ), ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF str1 <> str2 THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() @@ -4298,11 +4484,13 @@ $$"#; DataType::Int(None) ) ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF int1 <> 0 THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() @@ -4343,11 +4531,13 @@ $$"#; DataType::Int(None) ), ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF a <> b THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() @@ -4388,11 +4578,13 @@ $$"#; DataType::Int(None) ), ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF int1 <> int2 THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() @@ -4426,11 +4618,13 @@ $$"#; ), OperateFunctionArg::with_name("b", DataType::Varchar(None)), ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString { @@ -4467,11 +4661,13 @@ fn parse_create_function() { OperateFunctionArg::unnamed(DataType::Integer(None)), OperateFunctionArg::unnamed(DataType::Integer(None)), ]), - return_type: Some(DataType::Integer(None)), + return_type: Some(FunctionReturnType::DataType(DataType::Integer(None))), language: Some("SQL".into()), behavior: Some(FunctionBehavior::Immutable), called_on_null: Some(FunctionCalledOnNull::Strict), parallel: Some(FunctionParallel::Safe), + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::SingleQuotedString("select $1 + $2;".into())).with_empty_span() @@ -4502,6 +4698,85 @@ fn parse_create_function_detailed() { ); } +#[test] +fn parse_create_function_returns_setof() { + pg_and_generic().verified_stmt( + "CREATE FUNCTION get_users() RETURNS SETOF TEXT LANGUAGE sql AS 'SELECT name FROM users'", + ); + pg_and_generic().verified_stmt( + "CREATE FUNCTION get_ids() RETURNS SETOF INTEGER LANGUAGE sql AS 'SELECT id FROM users'", + ); + pg_and_generic().verified_stmt( + r#"CREATE FUNCTION get_all() RETURNS SETOF my_schema."MyType" LANGUAGE sql AS 'SELECT * FROM t'"#, + ); + pg_and_generic().verified_stmt( + "CREATE FUNCTION get_rows() RETURNS SETOF RECORD LANGUAGE sql AS 'SELECT * FROM t'", + ); + + let sql = "CREATE FUNCTION get_names() RETURNS SETOF TEXT LANGUAGE sql AS 'SELECT name FROM t'"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateFunction(CreateFunction { return_type, .. }) => { + assert_eq!(return_type, Some(FunctionReturnType::SetOf(DataType::Text))); + } + _ => panic!("Expected CreateFunction"), + } +} + +#[test] +fn parse_create_function_with_security() { + let sql = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SECURITY DEFINER AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateFunction(CreateFunction { security, .. }) => { + assert_eq!(security, Some(FunctionSecurity::Definer)); + } + _ => panic!("Expected CreateFunction"), + } + + let sql2 = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SECURITY INVOKER AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql2) { + Statement::CreateFunction(CreateFunction { security, .. }) => { + assert_eq!(security, Some(FunctionSecurity::Invoker)); + } + _ => panic!("Expected CreateFunction"), + } +} + +#[test] +fn parse_create_function_with_set_params() { + let sql = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SET search_path = auth, pg_temp, public AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateFunction(CreateFunction { set_params, .. }) => { + assert_eq!(set_params.len(), 1); + assert_eq!(set_params[0].name.to_string(), "search_path"); + } + _ => panic!("Expected CreateFunction"), + } + + // Test multiple SET params + let sql2 = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SET search_path = public SET statement_timeout = '5s' AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql2) { + Statement::CreateFunction(CreateFunction { set_params, .. }) => { + assert_eq!(set_params.len(), 2); + } + _ => panic!("Expected CreateFunction"), + } + + // Test FROM CURRENT + let sql3 = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SET search_path FROM CURRENT AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql3) { + Statement::CreateFunction(CreateFunction { set_params, .. }) => { + assert_eq!(set_params.len(), 1); + assert!(matches!(set_params[0].value, FunctionSetValue::FromCurrent)); + } + _ => panic!("Expected CreateFunction"), + } +} + #[test] fn parse_incorrect_create_function_parallel() { let sql = "CREATE FUNCTION add(INTEGER, INTEGER) RETURNS INTEGER LANGUAGE SQL PARALLEL BLAH AS 'select $1 + $2;'"; @@ -4522,14 +4797,16 @@ fn parse_create_function_c_with_module_pathname() { "input", DataType::Custom(ObjectName::from(vec![Ident::new("cstring")]), vec![]), ),]), - return_type: Some(DataType::Custom( + return_type: Some(FunctionReturnType::DataType(DataType::Custom( ObjectName::from(vec![Ident::new("cas")]), vec![] - )), + ))), language: Some("c".into()), behavior: Some(FunctionBehavior::Immutable), called_on_null: None, parallel: Some(FunctionParallel::Safe), + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::SingleQuotedString("MODULE_PATHNAME".into())).with_empty_span() @@ -4982,12 +5259,14 @@ fn parse_truncate() { let table_names = vec![TruncateTableTarget { name: table_name.clone(), only: false, + has_asterisk: false, }]; assert_eq!( Statement::Truncate(Truncate { table_names, partitions: None, table: false, + if_exists: false, identity: None, cascade: None, on_cluster: None, @@ -5005,6 +5284,7 @@ fn parse_truncate_with_options() { let table_names = vec![TruncateTableTarget { name: table_name.clone(), only: true, + has_asterisk: false, }]; assert_eq!( @@ -5012,6 +5292,7 @@ fn parse_truncate_with_options() { table_names, partitions: None, table: true, + if_exists: false, identity: Some(TruncateIdentityOption::Restart), cascade: Some(CascadeOption::Cascade), on_cluster: None, @@ -5033,10 +5314,12 @@ fn parse_truncate_with_table_list() { TruncateTableTarget { name: table_name_a.clone(), only: false, + has_asterisk: false, }, TruncateTableTarget { name: table_name_b.clone(), only: false, + has_asterisk: false, }, ]; @@ -5045,6 +5328,7 @@ fn parse_truncate_with_table_list() { table_names, partitions: None, table: true, + if_exists: false, identity: Some(TruncateIdentityOption::Restart), cascade: Some(CascadeOption::Cascade), on_cluster: None, @@ -5053,6 +5337,64 @@ fn parse_truncate_with_table_list() { ); } +#[test] +fn parse_truncate_with_descendant() { + let truncate = pg_and_generic().verified_stmt("TRUNCATE TABLE t *"); + + let table_names = vec![TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("t")]), + only: false, + has_asterisk: true, + }]; + + assert_eq!( + Statement::Truncate(Truncate { + table_names, + partitions: None, + table: true, + if_exists: false, + identity: None, + cascade: None, + on_cluster: None, + }), + truncate + ); + + let truncate = pg_and_generic() + .verified_stmt("TRUNCATE TABLE ONLY parent, child *, grandchild RESTART IDENTITY"); + + let table_names = vec![ + TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("parent")]), + only: true, + has_asterisk: false, + }, + TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("child")]), + only: false, + has_asterisk: true, + }, + TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("grandchild")]), + only: false, + has_asterisk: false, + }, + ]; + + assert_eq!( + Statement::Truncate(Truncate { + table_names, + partitions: None, + table: true, + if_exists: false, + identity: Some(TruncateIdentityOption::Restart), + cascade: None, + on_cluster: None, + }), + truncate + ); +} + #[test] fn parse_select_regexp_as_column_name() { pg_and_generic().verified_only_select( @@ -5213,6 +5555,7 @@ fn test_simple_postgres_insert_with_alias() { statement, Statement::Insert(Insert { insert_token: AttachedToken::empty(), + optimizer_hints: vec![], or: None, ignore: false, into: true, @@ -5221,22 +5564,25 @@ fn test_simple_postgres_insert_with_alias() { quote_style: None, span: Span::empty(), }])), - table_alias: Some(Ident { - value: "test_table".to_string(), - quote_style: None, - span: Span::empty(), + table_alias: Some(TableAliasWithoutColumns { + explicit: true, + alias: Ident { + value: "test_table".to_string(), + quote_style: None, + span: Span::empty(), + } }), columns: vec![ - Ident { + ObjectName::from(Ident { value: "id".to_string(), quote_style: None, span: Span::empty(), - }, - Ident { + }), + ObjectName::from(Ident { value: "a".to_string(), quote_style: None, span: Span::empty(), - } + }) ], overwrite: false, source: Some(Box::new(Query { @@ -5264,11 +5610,16 @@ fn test_simple_postgres_insert_with_alias() { has_table_keyword: false, on: None, returning: None, + output: None, replace_into: false, priority: None, insert_alias: None, settings: None, format_clause: None, + multi_table_insert_type: None, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, }) ) } @@ -5284,6 +5635,7 @@ fn test_simple_postgres_insert_with_alias() { statement, Statement::Insert(Insert { insert_token: AttachedToken::empty(), + optimizer_hints: vec![], or: None, ignore: false, into: true, @@ -5292,22 +5644,25 @@ fn test_simple_postgres_insert_with_alias() { quote_style: None, span: Span::empty(), }])), - table_alias: Some(Ident { - value: "test_table".to_string(), - quote_style: None, - span: Span::empty(), + table_alias: Some(TableAliasWithoutColumns { + explicit: true, + alias: Ident { + value: "test_table".to_string(), + quote_style: None, + span: Span::empty(), + } }), columns: vec![ - Ident { + ObjectName::from(Ident { value: "id".to_string(), quote_style: None, span: Span::empty(), - }, - Ident { + }), + ObjectName::from(Ident { value: "a".to_string(), quote_style: None, span: Span::empty(), - } + }) ], overwrite: false, source: Some(Box::new(Query { @@ -5338,11 +5693,16 @@ fn test_simple_postgres_insert_with_alias() { has_table_keyword: false, on: None, returning: None, + output: None, replace_into: false, priority: None, insert_alias: None, settings: None, format_clause: None, + multi_table_insert_type: None, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, }) ) } @@ -5357,6 +5717,7 @@ fn test_simple_insert_with_quoted_alias() { statement, Statement::Insert(Insert { insert_token: AttachedToken::empty(), + optimizer_hints: vec![], or: None, ignore: false, into: true, @@ -5365,22 +5726,25 @@ fn test_simple_insert_with_quoted_alias() { quote_style: None, span: Span::empty(), }])), - table_alias: Some(Ident { - value: "Test_Table".to_string(), - quote_style: Some('"'), - span: Span::empty(), + table_alias: Some(TableAliasWithoutColumns { + explicit: true, + alias: Ident { + value: "Test_Table".to_string(), + quote_style: Some('"'), + span: Span::empty(), + } }), columns: vec![ - Ident { + ObjectName::from(Ident { value: "id".to_string(), quote_style: None, span: Span::empty(), - }, - Ident { + }), + ObjectName::from(Ident { value: "a".to_string(), quote_style: None, span: Span::empty(), - } + }) ], overwrite: false, source: Some(Box::new(Query { @@ -5410,11 +5774,16 @@ fn test_simple_insert_with_quoted_alias() { has_table_keyword: false, on: None, returning: None, + output: None, replace_into: false, priority: None, insert_alias: None, settings: None, format_clause: None, + multi_table_insert_type: None, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, }) ) } @@ -5469,6 +5838,7 @@ fn parse_at_time_zone() { Value::SingleQuotedString("America/Los_Angeles".to_owned()).with_empty_span(), )), data_type: DataType::Text, + array: false, format: None, }), }), @@ -5529,6 +5899,12 @@ fn parse_interval_data_type() { } } +#[test] +fn parse_interval_keyword_as_unquoted_identifier() { + pg().verified_stmt("SELECT MAX(interval) FROM tbl"); + pg().verified_expr("INTERVAL '1 day'"); +} + #[test] fn parse_create_table_with_options() { let sql = "CREATE TABLE t (c INT) WITH (foo = 'bar', a = 123)"; @@ -6057,6 +6433,7 @@ fn parse_trigger_related_functions() { transient: false, volatile: false, iceberg: false, + snapshot: false, name: ObjectName::from(vec![Ident::new("emp")]), columns: vec![ ColumnDef { @@ -6098,6 +6475,8 @@ fn parse_trigger_related_functions() { cluster_by: None, clustered_by: None, inherits: None, + partition_of: None, + for_values: None, strict: false, copy_grants: false, enable_schema_evolution: None, @@ -6107,6 +6486,7 @@ fn parse_trigger_related_functions() { default_ddl_collation: None, with_aggregation_policy: None, with_row_access_policy: None, + with_storage_lifecycle_policy: None, with_tags: None, base_location: None, external_volume: None, @@ -6120,6 +6500,10 @@ fn parse_trigger_related_functions() { refresh_mode: None, initialize: None, require_user: false, + diststyle: None, + distkey: None, + sortkey: None, + backup: None, } ); @@ -6134,7 +6518,7 @@ fn parse_trigger_related_functions() { if_not_exists: false, name: ObjectName::from(vec![Ident::new("emp_stamp")]), args: Some(vec![]), - return_type: Some(DataType::Trigger), + return_type: Some(FunctionReturnType::DataType(DataType::Trigger)), function_body: Some( CreateFunctionBody::AsBeforeOptions { body: Expr::Value(( @@ -6153,6 +6537,8 @@ fn parse_trigger_related_functions() { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], using: None, language: Some(Ident::new("plpgsql")), determinism_specifier: None, @@ -6281,6 +6667,7 @@ fn arrow_cast_precedence() { (Value::SingleQuotedString("bar".to_string())).with_empty_span() )), data_type: DataType::Text, + array: false, format: None, }), } @@ -6460,6 +6847,30 @@ fn parse_alter_table_replica_identity() { } _ => unreachable!(), } + + match pg_and_generic().verified_stmt("ALTER TABLE foo REPLICA IDENTITY NOTHING") { + Statement::AlterTable(AlterTable { operations, .. }) => { + assert_eq!( + operations, + vec![AlterTableOperation::ReplicaIdentity { + identity: ReplicaIdentity::Nothing + }] + ); + } + _ => unreachable!(), + } + + match pg_and_generic().verified_stmt("ALTER TABLE foo REPLICA IDENTITY DEFAULT") { + Statement::AlterTable(AlterTable { operations, .. }) => { + assert_eq!( + operations, + vec![AlterTableOperation::ReplicaIdentity { + identity: ReplicaIdentity::Default + }] + ); + } + _ => unreachable!(), + } } #[test] @@ -6715,24 +7126,26 @@ fn parse_create_operator() { length: 255, unit: None }))), - commutator: Some(ObjectName::from(vec![ - Ident::new("schema"), - Ident::new(">") - ])), - negator: Some(ObjectName::from(vec![ - Ident::new("schema"), - Ident::new("<=") - ])), - restrict: Some(ObjectName::from(vec![ - Ident::new("myschema"), - Ident::new("sel_func") - ])), - join: Some(ObjectName::from(vec![ - Ident::new("myschema"), - Ident::new("join_func") - ])), - hashes: true, - merges: true, + options: vec![ + OperatorOption::Commutator(ObjectName::from(vec![ + Ident::new("schema"), + Ident::new(">") + ])), + OperatorOption::Negator(ObjectName::from(vec![ + Ident::new("schema"), + Ident::new("<=") + ])), + OperatorOption::Restrict(Some(ObjectName::from(vec![ + Ident::new("myschema"), + Ident::new("sel_func") + ]))), + OperatorOption::Join(Some(ObjectName::from(vec![ + Ident::new("myschema"), + Ident::new("join_func") + ]))), + OperatorOption::Hashes, + OperatorOption::Merges, + ], }) ); @@ -6748,12 +7161,7 @@ fn parse_create_operator() { is_procedure: false, left_arg: None, right_arg: None, - commutator: None, - negator: None, - restrict: None, - join: None, - hashes: false, - merges: false, + options: vec![], }) ); } @@ -6778,13 +7186,9 @@ fn parse_create_operator() { ), ] { match pg().verified_stmt(&format!("CREATE OPERATOR {name} (FUNCTION = f)")) { - Statement::CreateOperator(CreateOperator { - name, - hashes: false, - merges: false, - .. - }) => { + Statement::CreateOperator(CreateOperator { name, options, .. }) => { assert_eq!(name, expected_name); + assert!(options.is_empty()); } _ => unreachable!(), } @@ -6921,21 +7325,757 @@ fn parse_drop_operator() { } #[test] -fn parse_drop_operator_family() { - for if_exists in [true, false] { - for drop_behavior in [ - None, - Some(DropBehavior::Cascade), - Some(DropBehavior::Restrict), +fn parse_alter_operator() { + use sqlparser::ast::{AlterOperator, AlterOperatorOperation, OperatorOption, Owner}; + + // Test ALTER OPERATOR ... OWNER TO with different owner types + for (owner_sql, owner_ast) in [ + ("joe", Owner::Ident(Ident::new("joe"))), + ("CURRENT_USER", Owner::CurrentUser), + ("CURRENT_ROLE", Owner::CurrentRole), + ("SESSION_USER", Owner::SessionUser), + ] { + for (op_name, op_name_ast, left_type_sql, left_type_ast, right_type_sql, right_type_ast) in [ + ( + "+", + ObjectName::from(vec![Ident::new("+")]), + "INTEGER", + Some(DataType::Integer(None)), + "INTEGER", + DataType::Integer(None), + ), + ( + "~", + ObjectName::from(vec![Ident::new("~")]), + "NONE", + None, + "BIT", + DataType::Bit(None), + ), + ( + "@@", + ObjectName::from(vec![Ident::new("@@")]), + "TEXT", + Some(DataType::Text), + "TEXT", + DataType::Text, + ), ] { - for index_method in &["btree", "hash", "gist", "gin", "spgist", "brin"] { - for (names_str, names_vec) in [ - ( - "float_ops", - vec![ObjectName::from(vec![Ident::new("float_ops")])], - ), - ( - "myschema.custom_ops", + let sql = format!( + "ALTER OPERATOR {} ({}, {}) OWNER TO {}", + op_name, left_type_sql, right_type_sql, owner_sql + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: op_name_ast.clone(), + left_type: left_type_ast.clone(), + right_type: right_type_ast.clone(), + operation: AlterOperatorOperation::OwnerTo(owner_ast.clone()), + }) + ); + } + } + + // Test ALTER OPERATOR ... SET SCHEMA + for (op_name, op_name_ast, schema_name, schema_name_ast) in [ + ( + "+", + ObjectName::from(vec![Ident::new("+")]), + "new_schema", + ObjectName::from(vec![Ident::new("new_schema")]), + ), + ( + "myschema.@@", + ObjectName::from(vec![Ident::new("myschema"), Ident::new("@@")]), + "other_schema", + ObjectName::from(vec![Ident::new("other_schema")]), + ), + ] { + let sql = format!( + "ALTER OPERATOR {} (TEXT, TEXT) SET SCHEMA {}", + op_name, schema_name + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: op_name_ast, + left_type: Some(DataType::Text), + right_type: DataType::Text, + operation: AlterOperatorOperation::SetSchema { + schema_name: schema_name_ast, + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with RESTRICT and JOIN + for (restrict_val, restrict_ast, join_val, join_ast) in [ + ( + "_int_contsel", + Some(ObjectName::from(vec![Ident::new("_int_contsel")])), + "_int_contjoinsel", + Some(ObjectName::from(vec![Ident::new("_int_contjoinsel")])), + ), + ( + "NONE", + None, + "my_joinsel", + Some(ObjectName::from(vec![Ident::new("my_joinsel")])), + ), + ( + "my_sel", + Some(ObjectName::from(vec![Ident::new("my_sel")])), + "NONE", + None, + ), + ] { + let sql = format!( + "ALTER OPERATOR && (TEXT, TEXT) SET (RESTRICT = {}, JOIN = {})", + restrict_val, join_val + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new("&&")]), + left_type: Some(DataType::Text), + right_type: DataType::Text, + operation: AlterOperatorOperation::Set { + options: vec![ + OperatorOption::Restrict(restrict_ast), + OperatorOption::Join(join_ast), + ], + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with COMMUTATOR and NEGATOR + for (operator, commutator, negator) in [("&&", "&&", ">"), ("+", "+", "-"), ("<", "<", ">=")] { + let sql = format!( + "ALTER OPERATOR {} (INTEGER, INTEGER) SET (COMMUTATOR = {}, NEGATOR = {})", + operator, commutator, negator + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new(operator)]), + left_type: Some(DataType::Integer(None)), + right_type: DataType::Integer(None), + operation: AlterOperatorOperation::Set { + options: vec![ + OperatorOption::Commutator(ObjectName::from(vec![Ident::new(commutator)])), + OperatorOption::Negator(ObjectName::from(vec![Ident::new(negator)])), + ], + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with HASHES and MERGES (individually and combined) + for (operator, options_sql, options_ast) in [ + ("=", "HASHES", vec![OperatorOption::Hashes]), + ("<", "MERGES", vec![OperatorOption::Merges]), + ( + "<=", + "HASHES, MERGES", + vec![OperatorOption::Hashes, OperatorOption::Merges], + ), + ] { + let sql = format!( + "ALTER OPERATOR {} (INTEGER, INTEGER) SET ({})", + operator, options_sql + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new(operator)]), + left_type: Some(DataType::Integer(None)), + right_type: DataType::Integer(None), + operation: AlterOperatorOperation::Set { + options: options_ast + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with multiple options combined + let sql = + "ALTER OPERATOR + (INTEGER, INTEGER) SET (COMMUTATOR = +, NEGATOR = -, HASHES, MERGES)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new("+")]), + left_type: Some(DataType::Integer(None)), + right_type: DataType::Integer(None), + operation: AlterOperatorOperation::Set { + options: vec![ + OperatorOption::Commutator(ObjectName::from(vec![Ident::new("+")])), + OperatorOption::Negator(ObjectName::from(vec![Ident::new("-")])), + OperatorOption::Hashes, + OperatorOption::Merges, + ], + }, + }) + ); +} + +#[test] +fn parse_alter_operator_family() { + // Test ALTER OPERATOR FAMILY ... ADD OPERATOR + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD OPERATOR 1 < (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Operator { + strategy_number: 1, + operator_name: ObjectName::from(vec![Ident::new("<")]), + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + purpose: None, + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR SEARCH + let sql = + "ALTER OPERATOR FAMILY text_ops USING btree ADD OPERATOR 1 @@ (TEXT, TEXT) FOR SEARCH"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("text_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Operator { + strategy_number: 1, + operator_name: ObjectName::from(vec![Ident::new("@@")]), + op_types: vec![DataType::Text, DataType::Text], + purpose: Some(OperatorPurpose::ForSearch), + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... ADD FUNCTION + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD FUNCTION 1 btint42cmp(INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Function { + support_number: 1, + op_types: None, + function_name: ObjectName::from(vec![Ident::new("btint42cmp")]), + argument_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... DROP OPERATOR + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP OPERATOR 1 (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Drop { + items: vec![OperatorFamilyDropItem::Operator { + strategy_number: 1, + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... DROP FUNCTION + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP FUNCTION 1 (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Drop { + items: vec![OperatorFamilyDropItem::Function { + support_number: 1, + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... RENAME TO + let sql = "ALTER OPERATOR FAMILY old_ops USING btree RENAME TO new_ops"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("old_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::RenameTo { + new_name: ObjectName::from(vec![Ident::new("new_ops")]), + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... OWNER TO + let sql = "ALTER OPERATOR FAMILY my_ops USING btree OWNER TO joe"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("my_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::OwnerTo(Owner::Ident(Ident::new("joe"))), + }) + ); + + // Test ALTER OPERATOR FAMILY ... SET SCHEMA + let sql = "ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA new_schema"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("my_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::SetSchema { + schema_name: ObjectName::from(vec![Ident::new("new_schema")]), + }, + }) + ); + + // Test error cases + // Missing USING clause + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops ADD OPERATOR 1 < (INT4, INT2)") + .is_err()); + + // Invalid operation + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree INVALID_OPERATION") + .is_err()); + + // Missing operator name in ADD OPERATOR + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)" + ) + .is_err()); + + // Missing function name in ADD FUNCTION + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)" + ) + .is_err()); + + // Missing parentheses in DROP OPERATOR + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 INT4, INT2") + .is_err()); + + // Invalid operator name (empty) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)" + ) + .is_err()); + + // Invalid operator name (special characters) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 @#$ (INT4, INT2)" + ) + .is_err()); + + // Negative strategy number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR -1 < (INT4, INT2)" + ) + .is_err()); + + // Non-integer strategy number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1.5 < (INT4, INT2)" + ) + .is_err()); + + // Missing closing parenthesis in operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2" + ) + .is_err()); + + // Missing opening parenthesis in operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < INT4, INT2)" + ) + .is_err()); + + // Empty operator types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < ()") + .is_err()); + + // Invalid data type (using punctuation) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (@#$%, INT2)" + ) + .is_err()); + + // Incomplete FOR clause + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR" + ) + .is_err()); + + // Invalid FOR clause keyword + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR INVALID" + ) + .is_err()); + + // FOR ORDER BY without sort family + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY" + ) + .is_err()); + + // Missing function name in ADD FUNCTION + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)" + ) + .is_err()); + + // Invalid function name + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 123invalid(INT4, INT2)" + ) + .is_err()); + + // Negative support number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION -1 func(INT4, INT2)" + ) + .is_err()); + + // Non-integer support number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1.5 func(INT4, INT2)" + ) + .is_err()); + + // Missing closing parenthesis in function operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2 func()" + ) + .is_err()); + + // Missing closing parenthesis in function arguments + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(INT4, INT2" + ) + .is_err()); + + // Invalid data type in function arguments + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(@#$%, INT2)" + ) + .is_err()); + + // DROP OPERATOR with FOR clause (not allowed) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2) FOR SEARCH" + ) + .is_err()); + + // DROP FUNCTION with function arguments (not allowed) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 (INT4, INT2) func(INT4)" + ) + .is_err()); + + // Multiple ADD items with error in middle + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2), INVALID_ITEM" + ) + .is_err()); + + // Multiple DROP items with error in middle + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2), INVALID_ITEM" + ) + .is_err()); + + // RENAME TO with invalid new name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree RENAME TO 123invalid") + .is_err()); + + // OWNER TO with invalid owner + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree OWNER TO 123invalid") + .is_err()); + + // SET SCHEMA with invalid schema name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA 123invalid") + .is_err()); + + // Schema-qualified operator family name with invalid schema + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY 123invalid.my_ops USING btree ADD OPERATOR 1 < (INT4, INT2)" + ) + .is_err()); + + // Missing operator family name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY USING btree ADD OPERATOR 1 < (INT4, INT2)") + .is_err()); + + // Extra tokens at end + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) EXTRA" + ) + .is_err()); + + // Incomplete statement + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD") + .is_err()); + + // Very long numbers + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 999999999999999999999 < (INT4, INT2)") + .is_err()); + + // Multiple FOR clauses + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH FOR ORDER BY sort_family") + .is_err()); + + // FOR SEARCH with extra tokens + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH EXTRA") + .is_err()); + + // FOR ORDER BY with invalid sort family + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY 123invalid") + .is_err()); + + // Function with empty operator types but missing function args parens + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 () func") + .is_err()); + + // Function with mismatched parentheses + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4 func(INT2" + ) + .is_err()); + + // DROP with empty types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 ()") + .is_err()); + + // DROP FUNCTION with empty types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 ()") + .is_err()); +} + +#[test] +fn parse_alter_operator_class() { + // Test ALTER OPERATOR CLASS ... RENAME TO + let sql = "ALTER OPERATOR CLASS int_ops USING btree RENAME TO integer_ops"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::RenameTo { + new_name: ObjectName::from(vec![Ident::new("integer_ops")]), + }, + }) + ); + + // Test ALTER OPERATOR CLASS ... OWNER TO + let sql = "ALTER OPERATOR CLASS int_ops USING btree OWNER TO joe"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::OwnerTo(Owner::Ident(Ident::new("joe"))), + }) + ); + + // Test ALTER OPERATOR CLASS ... OWNER TO CURRENT_USER + let sql = "ALTER OPERATOR CLASS int_ops USING btree OWNER TO CURRENT_USER"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::OwnerTo(Owner::CurrentUser), + }) + ); + + // Test ALTER OPERATOR CLASS ... SET SCHEMA + let sql = "ALTER OPERATOR CLASS int_ops USING btree SET SCHEMA new_schema"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::SetSchema { + schema_name: ObjectName::from(vec![Ident::new("new_schema")]), + }, + }) + ); + + // Test with schema-qualified operator class name + let sql = "ALTER OPERATOR CLASS myschema.int_ops USING btree RENAME TO integer_ops"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("myschema"), Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::RenameTo { + new_name: ObjectName::from(vec![Ident::new("integer_ops")]), + }, + }) + ); + + // Test with different index methods + for index_method in &["hash", "gist", "gin", "spgist", "brin"] { + let sql = format!( + "ALTER OPERATOR CLASS int_ops USING {} RENAME TO integer_ops", + index_method + ); + pg_and_generic().verified_stmt(&sql); + } + + // Test error cases + // Missing USING clause + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops RENAME TO integer_ops") + .is_err()); + + // Invalid operation + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree INVALID_OPERATION") + .is_err()); + + // Missing new name for RENAME TO + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree RENAME TO") + .is_err()); + + // Missing owner for OWNER TO + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree OWNER TO") + .is_err()); + + // Missing schema for SET SCHEMA + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree SET SCHEMA") + .is_err()); + + // Invalid new name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree RENAME TO 123invalid") + .is_err()); + + // Invalid owner + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree OWNER TO 123invalid") + .is_err()); + + // Invalid schema name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree SET SCHEMA 123invalid") + .is_err()); + + // Missing operator class name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS USING btree RENAME TO integer_ops") + .is_err()); + + // Extra tokens at end + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR CLASS int_ops USING btree RENAME TO integer_ops EXTRA" + ) + .is_err()); + + // Missing index method + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops RENAME TO integer_ops") + .is_err()); + + // Invalid index method + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING 123invalid RENAME TO integer_ops") + .is_err()); + + // Trying to use ADD operation (only valid for OPERATOR FAMILY) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR CLASS int_ops USING btree ADD OPERATOR 1 < (INT4, INT2)" + ) + .is_err()); + + // Trying to use DROP operation (only valid for OPERATOR FAMILY) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR CLASS int_ops USING btree DROP OPERATOR 1 (INT4, INT2)" + ) + .is_err()); +} + +#[test] +fn parse_drop_operator_family() { + for if_exists in [true, false] { + for drop_behavior in [ + None, + Some(DropBehavior::Cascade), + Some(DropBehavior::Restrict), + ] { + for index_method in &["btree", "hash", "gist", "gin", "spgist", "brin"] { + for (names_str, names_vec) in [ + ( + "float_ops", + vec![ObjectName::from(vec![Ident::new("float_ops")])], + ), + ( + "myschema.custom_ops", vec![ObjectName::from(vec![ Ident::new("myschema"), Ident::new("custom_ops"), @@ -7303,3 +8443,350 @@ fn parse_create_operator_class() { ) .is_err()); } + +#[test] +fn parse_identifiers_semicolon_handling() { + let statement = "SHOW search_path; SELECT 1"; + pg_and_generic().statements_parse_to(statement, statement); + let statement = "SHOW search_path; SHOW ALL; SHOW ALL"; + pg_and_generic().statements_parse_to(statement, statement); +} + +#[test] +fn parse_create_table_partition_of_range() { + // RANGE partition with FROM ... TO + let sql = "CREATE TABLE measurement_y2006m02 PARTITION OF measurement FOR VALUES FROM ('2006-02-01') TO ('2006-03-01')"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("measurement_y2006m02", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("measurement")])), + create_table.partition_of + ); + match create_table.for_values { + Some(ForValues::From { from, to }) => { + assert_eq!(1, from.len()); + assert_eq!(1, to.len()); + match &from[0] { + PartitionBoundValue::Expr(Expr::Value(v)) => { + assert_eq!("'2006-02-01'", v.to_string()); + } + _ => panic!("Expected Expr value in from"), + } + match &to[0] { + PartitionBoundValue::Expr(Expr::Value(v)) => { + assert_eq!("'2006-03-01'", v.to_string()); + } + _ => panic!("Expected Expr value in to"), + } + } + _ => panic!("Expected ForValues::From"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_range_with_minvalue_maxvalue() { + // RANGE partition with MINVALUE/MAXVALUE + let sql = + "CREATE TABLE orders_old PARTITION OF orders FOR VALUES FROM (MINVALUE) TO ('2020-01-01')"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_old", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + match create_table.for_values { + Some(ForValues::From { from, to }) => { + assert_eq!(PartitionBoundValue::MinValue, from[0]); + match &to[0] { + PartitionBoundValue::Expr(Expr::Value(v)) => { + assert_eq!("'2020-01-01'", v.to_string()); + } + _ => panic!("Expected Expr value in to"), + } + } + _ => panic!("Expected ForValues::From"), + } + } + _ => panic!("Expected CreateTable"), + } + + // With MAXVALUE + let sql = + "CREATE TABLE orders_new PARTITION OF orders FOR VALUES FROM ('2024-01-01') TO (MAXVALUE)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => match create_table.for_values { + Some(ForValues::From { from, to }) => { + match &from[0] { + PartitionBoundValue::Expr(Expr::Value(v)) => { + assert_eq!("'2024-01-01'", v.to_string()); + } + _ => panic!("Expected Expr value in from"), + } + assert_eq!(PartitionBoundValue::MaxValue, to[0]); + } + _ => panic!("Expected ForValues::From"), + }, + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_list() { + // LIST partition + let sql = "CREATE TABLE orders_us PARTITION OF orders FOR VALUES IN ('US', 'CA', 'MX')"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_us", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + match create_table.for_values { + Some(ForValues::In(values)) => { + assert_eq!(3, values.len()); + } + _ => panic!("Expected ForValues::In"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_hash() { + // HASH partition + let sql = "CREATE TABLE orders_p0 PARTITION OF orders FOR VALUES WITH (MODULUS 4, REMAINDER 0)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_p0", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + match create_table.for_values { + Some(ForValues::With { modulus, remainder }) => { + assert_eq!(4, modulus); + assert_eq!(0, remainder); + } + _ => panic!("Expected ForValues::With"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_default() { + // DEFAULT partition + let sql = "CREATE TABLE orders_default PARTITION OF orders DEFAULT"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_default", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + assert_eq!(Some(ForValues::Default), create_table.for_values); + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_multicolumn_range() { + // Multi-column RANGE partition + let sql = "CREATE TABLE sales_2023_q1 PARTITION OF sales FOR VALUES FROM ('2023-01-01', 1) TO ('2023-04-01', 1)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("sales_2023_q1", create_table.name.to_string()); + match create_table.for_values { + Some(ForValues::From { from, to }) => { + assert_eq!(2, from.len()); + assert_eq!(2, to.len()); + } + _ => panic!("Expected ForValues::From"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_with_constraint() { + // With table constraint (not column constraint which has different syntax in PARTITION OF) + let sql = "CREATE TABLE orders_2023 PARTITION OF orders (\ +CONSTRAINT check_date CHECK (order_date >= '2023-01-01')\ +) FOR VALUES FROM ('2023-01-01') TO ('2024-01-01')"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_2023", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + // Check that table constraint was parsed + assert_eq!(1, create_table.constraints.len()); + match create_table.for_values { + Some(ForValues::From { .. }) => {} + _ => panic!("Expected ForValues::From"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_errors() { + let sql = "CREATE TABLE p PARTITION OF parent"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("FOR VALUES or DEFAULT"), + "Expected error about FOR VALUES, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent WITH (fillfactor = 70)"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("FOR VALUES or DEFAULT"), + "Expected error about FOR VALUES, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES RANGE (1, 10)"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("IN, FROM, or WITH"), + "Expected error about invalid keyword after FOR VALUES, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES FROM (1)"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("TO"), + "Expected error about missing TO clause, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES IN ()"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("at least one value"), + "Expected error about empty value list in IN clause, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES FROM () TO (10)"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("at least one value"), + "Expected error about empty FROM list, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES FROM (1) TO ()"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("at least one value"), + "Expected error about empty TO list, got: {err}" + ); +} + +#[test] +fn parse_pg_analyze() { + // Bare ANALYZE + pg_and_generic().verified_stmt("ANALYZE"); + + // ANALYZE with table name + pg_and_generic().verified_stmt("ANALYZE t"); + + // ANALYZE with column specification + pg_and_generic().verified_stmt("ANALYZE t (col1, col2)"); + + // Verify AST for column specification + let stmt = pg().verified_stmt("ANALYZE t (col1, col2)"); + match &stmt { + Statement::Analyze(analyze) => { + assert_eq!(analyze.table_name.as_ref().unwrap().to_string(), "t"); + assert_eq!(analyze.columns.len(), 2); + assert_eq!(analyze.columns[0].to_string(), "col1"); + assert_eq!(analyze.columns[1].to_string(), "col2"); + assert!(!analyze.for_columns); + } + _ => panic!("Expected Analyze, got: {stmt:?}"), + } +} + +#[test] +fn parse_lock_table() { + pg_and_generic().one_statement_parses_to( + "LOCK public.widgets IN EXCLUSIVE MODE", + "LOCK TABLE public.widgets IN EXCLUSIVE MODE", + ); + pg_and_generic().one_statement_parses_to( + "LOCK TABLE public.widgets NOWAIT", + "LOCK TABLE public.widgets NOWAIT", + ); + + let stmt = pg_and_generic().verified_stmt( + "LOCK TABLE ONLY public.widgets, analytics.events * IN SHARE ROW EXCLUSIVE MODE NOWAIT", + ); + match stmt { + Statement::Lock(lock) => { + assert_eq!(lock.tables.len(), 2); + assert_eq!(lock.tables[0].name.to_string(), "public.widgets"); + assert!(lock.tables[0].only); + assert!(!lock.tables[0].has_asterisk); + assert_eq!(lock.tables[1].name.to_string(), "analytics.events"); + assert!(!lock.tables[1].only); + assert!(lock.tables[1].has_asterisk); + assert_eq!(lock.lock_mode, Some(LockTableMode::ShareRowExclusive)); + assert!(lock.nowait); + } + _ => panic!("Expected Lock, got: {stmt:?}"), + } + + let lock_modes = [ + ("ACCESS SHARE", LockTableMode::AccessShare), + ("ROW SHARE", LockTableMode::RowShare), + ("ROW EXCLUSIVE", LockTableMode::RowExclusive), + ( + "SHARE UPDATE EXCLUSIVE", + LockTableMode::ShareUpdateExclusive, + ), + ("SHARE", LockTableMode::Share), + ("SHARE ROW EXCLUSIVE", LockTableMode::ShareRowExclusive), + ("EXCLUSIVE", LockTableMode::Exclusive), + ("ACCESS EXCLUSIVE", LockTableMode::AccessExclusive), + ]; + + for (mode_sql, expected_mode) in lock_modes { + let stmt = pg_and_generic() + .verified_stmt(&format!("LOCK TABLE public.widgets IN {mode_sql} MODE")); + match stmt { + Statement::Lock(lock) => { + assert_eq!(lock.tables.len(), 1); + assert_eq!(lock.tables[0].name.to_string(), "public.widgets"); + assert!(!lock.tables[0].only); + assert!(!lock.tables[0].has_asterisk); + assert_eq!(lock.lock_mode, Some(expected_mode)); + assert!(!lock.nowait); + } + _ => panic!("Expected Lock, got: {stmt:?}"), + } + } +} diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 49cf9cfa85..8c02c78352 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -452,9 +452,68 @@ fn parse_vacuum() { Ident::new("tbl1"), ])) ); - assert_eq!(v.threshold, Some(number("20"))); + assert_eq!(v.threshold, Some(number("20").with_empty_span())); assert!(v.boost); } _ => unreachable!(), } } + +#[test] +fn test_create_table_diststyle_distkey() { + redshift().verified_stmt( + "CREATE TEMPORARY TABLE tmp_sbk_summary_pp DISTSTYLE KEY DISTKEY(bet_id) AS SELECT 1 AS bet_id", + ); +} + +#[test] +fn test_create_table_diststyle() { + redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE AUTO"); + redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE EVEN"); + redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE KEY DISTKEY(c1)"); + redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE ALL"); +} + +#[test] +fn test_copy_credentials() { + redshift().verified_stmt( + "COPY t1 FROM 's3://bucket/file.csv' CREDENTIALS 'aws_access_key_id=AK;aws_secret_access_key=SK' CSV", + ); +} + +#[test] +fn test_create_table_sortkey() { + redshift().verified_stmt("CREATE TABLE t1 (c1 INT, c2 INT, c3 TIMESTAMP) SORTKEY(c3)"); + redshift().verified_stmt("CREATE TABLE t1 (c1 INT, c2 INT) SORTKEY(c1, c2)"); +} + +#[test] +fn test_create_table_distkey_sortkey_with_ctas() { + redshift().verified_stmt( + "CREATE TABLE t1 DISTKEY(1) SORTKEY(1, 3) AS SELECT eventid, venueid, dateid, eventname FROM event", + ); +} + +#[test] +fn test_create_table_diststyle_distkey_sortkey() { + redshift().verified_stmt( + "CREATE TABLE t1 (c1 INT, c2 INT) DISTSTYLE KEY DISTKEY(c1) SORTKEY(c1, c2)", + ); +} + +#[test] +fn test_alter_table_alter_sortkey() { + redshift().verified_stmt("ALTER TABLE users ALTER SORTKEY(created_at)"); + redshift().verified_stmt("ALTER TABLE users ALTER SORTKEY(c1, c2)"); +} + +#[test] +fn test_create_table_backup() { + redshift().verified_stmt("CREATE TABLE public.users (id INT, name VARCHAR(255)) BACKUP YES"); + + redshift().verified_stmt("CREATE TABLE staging.events (event_id INT) BACKUP NO"); + + redshift().verified_stmt( + "CREATE TABLE public.users_backup_test BACKUP YES DISTSTYLE AUTO AS SELECT id, name, email FROM public.users", + ); +} diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 39378af7d0..929c09bf58 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -286,6 +286,32 @@ fn test_snowflake_create_table_with_row_access_policy() { } } +#[test] +fn test_snowflake_create_table_with_storage_lifecycle_policy() { + // WITH keyword + match snowflake().verified_stmt( + "CREATE TABLE IF NOT EXISTS my_table (a NUMBER(38, 0), b VARIANT) WITH STORAGE LIFECYCLE POLICY dba.global_settings.my_policy ON (a)", + ) { + Statement::CreateTable(CreateTable { + name, + with_storage_lifecycle_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + let policy = with_storage_lifecycle_policy.unwrap(); + assert_eq!("dba.global_settings.my_policy", policy.policy.to_string()); + assert_eq!(vec![Ident::new("a")], policy.on); + } + _ => unreachable!(), + } + + // Without WITH keyword — canonicalizes to WITH form + snowflake().one_statement_parses_to( + "CREATE TABLE my_table (a NUMBER(38, 0)) STORAGE LIFECYCLE POLICY my_policy ON (a, b)", + "CREATE TABLE my_table (a NUMBER(38, 0)) WITH STORAGE LIFECYCLE POLICY my_policy ON (a, b)", + ); +} + #[test] fn test_snowflake_create_table_with_tag() { match snowflake() @@ -1101,8 +1127,8 @@ fn parse_create_dynamic_table() { " EXTERNAL_VOLUME='my_external_volume'", " CATALOG='SNOWFLAKE'", " BASE_LOCATION='my_iceberg_table'", - " TARGET_LAG='20 minutes'", - " WAREHOUSE=mywh", + " TARGET_LAG='20 minutes'", + " WAREHOUSE=mywh", " AS SELECT product_id, product_name FROM staging_table" )); @@ -1250,6 +1276,7 @@ fn parse_array() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("a"))), data_type: DataType::Array(ArrayElemTypeDef::None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -1265,39 +1292,8 @@ fn parse_lateral_flatten() { // https://docs.snowflake.com/en/user-guide/querying-semistructured #[test] fn parse_semi_structured_data_traversal() { - // most basic case - let sql = "SELECT a:b FROM t"; - let select = snowflake().verified_only_select(sql); - assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - has_colon: true, - path: vec![JsonPathElem::Dot { - key: "b".to_owned(), - quoted: false - }] - }, - }), - select.projection[0] - ); - - // identifier can be quoted - let sql = r#"SELECT a:"my long object key name" FROM t"#; - let select = snowflake().verified_only_select(sql); - assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - has_colon: true, - path: vec![JsonPathElem::Dot { - key: "my long object key name".to_owned(), - quoted: true - }] - }, - }), - select.projection[0] - ); + // see `tests/sqlparser_common.rs` -> `parse_semi_structured_data_traversal` for more test + // cases. This test only has Snowflake-specific syntax like array access. // expressions are allowed in bracket notation let sql = r#"SELECT a[2 + 2] FROM t"#; @@ -1319,92 +1315,6 @@ fn parse_semi_structured_data_traversal() { select.projection[0] ); - snowflake().verified_stmt("SELECT a:b::INT FROM t"); - - // unquoted keywords are permitted in the object key - let sql = "SELECT a:select, a:from FROM t"; - let select = snowflake().verified_only_select(sql); - assert_eq!( - vec![ - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - has_colon: true, - path: vec![JsonPathElem::Dot { - key: "select".to_owned(), - quoted: false - }] - }, - }), - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - has_colon: true, - path: vec![JsonPathElem::Dot { - key: "from".to_owned(), - quoted: false - }] - }, - }) - ], - select.projection - ); - - // multiple levels can be traversed - // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation - let sql = r#"SELECT a:foo."bar".baz"#; - let select = snowflake().verified_only_select(sql); - assert_eq!( - vec![SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - has_colon: true, - path: vec![ - JsonPathElem::Dot { - key: "foo".to_owned(), - quoted: false, - }, - JsonPathElem::Dot { - key: "bar".to_owned(), - quoted: true, - }, - JsonPathElem::Dot { - key: "baz".to_owned(), - quoted: false, - } - ] - }, - })], - select.projection - ); - - // dot and bracket notation can be mixed (starting with : case) - // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation - let sql = r#"SELECT a:foo[0].bar"#; - let select = snowflake().verified_only_select(sql); - assert_eq!( - vec![SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - has_colon: true, - path: vec![ - JsonPathElem::Dot { - key: "foo".to_owned(), - quoted: false, - }, - JsonPathElem::Bracket { - key: Expr::value(number("0")), - }, - JsonPathElem::Dot { - key: "bar".to_owned(), - quoted: false, - } - ] - }, - })], - select.projection - ); - // dot and bracket notation can be mixed (starting with bracket case) // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation let sql = r#"SELECT a[0].foo.bar"#; @@ -1470,8 +1380,6 @@ fn parse_semi_structured_data_traversal() { Expr::JsonAccess { value: Box::new(Expr::Cast { kind: CastKind::DoubleColon, - data_type: DataType::Array(ArrayElemTypeDef::None), - format: None, expr: Box::new(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { @@ -1481,7 +1389,10 @@ fn parse_semi_structured_data_traversal() { quoted: false }] } - }) + }), + data_type: DataType::Array(ArrayElemTypeDef::None), + array: false, + format: None, }), path: JsonPath { has_colon: false, @@ -1595,7 +1506,9 @@ fn snowflake_and_generic() -> TestedDialects { fn test_select_wildcard_with_exclude() { let select = snowflake_and_generic().verified_only_select("SELECT * EXCLUDE (col_a) FROM data"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Multiple(vec![Ident::new("col_a")])), + opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ObjectName::from( + Ident::new("col_a"), + )])), ..Default::default() }); assert_eq!(expected, select.projection[0]); @@ -1605,7 +1518,9 @@ fn test_select_wildcard_with_exclude() { let expected = SelectItem::QualifiedWildcard( SelectItemQualifiedWildcardKind::ObjectName(ObjectName::from(vec![Ident::new("name")])), WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))), + opt_exclude: Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "department_id", + )))), ..Default::default() }, ); @@ -1615,8 +1530,8 @@ fn test_select_wildcard_with_exclude() { .verified_only_select("SELECT * EXCLUDE (department_id, employee_id) FROM employee_table"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ - Ident::new("department_id"), - Ident::new("employee_id"), + ObjectName::from(Ident::new("department_id")), + ObjectName::from(Ident::new("employee_id")), ])), ..Default::default() }); @@ -1701,7 +1616,9 @@ fn test_select_wildcard_with_exclude_and_rename() { let select = snowflake_and_generic() .verified_only_select("SELECT * EXCLUDE col_z RENAME col_a AS col_b FROM data"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("col_z"))), + opt_exclude: Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "col_z", + )))), opt_rename: Some(RenameSelectItem::Single(IdentWithAlias { ident: Ident::new("col_a"), alias: Ident::new("col_b"), @@ -2132,27 +2049,27 @@ fn test_create_stage_with_stage_params() { ); assert!(stage_params.credentials.options.contains(&KeyValueOption { option_name: "AWS_KEY_ID".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "1a2b3c".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("1a2b3c".to_string()).with_empty_span() + ), })); assert!(stage_params.credentials.options.contains(&KeyValueOption { option_name: "AWS_SECRET_KEY".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "4x5y6z".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("4x5y6z".to_string()).with_empty_span() + ), })); assert!(stage_params.encryption.options.contains(&KeyValueOption { option_name: "MASTER_KEY".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "key".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("key".to_string()).with_empty_span() + ), })); assert!(stage_params.encryption.options.contains(&KeyValueOption { option_name: "TYPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "AWS_SSE_KMS".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("AWS_SSE_KMS".to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2176,17 +2093,17 @@ fn test_create_stage_with_directory_table_params() { } => { assert!(directory_table_params.options.contains(&KeyValueOption { option_name: "ENABLE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(true)), + option_value: KeyValueOptionKind::Single(Value::Boolean(true).with_empty_span()), })); assert!(directory_table_params.options.contains(&KeyValueOption { option_name: "REFRESH_ON_CREATE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(false)), + option_value: KeyValueOptionKind::Single(Value::Boolean(false).with_empty_span()), })); assert!(directory_table_params.options.contains(&KeyValueOption { option_name: "NOTIFICATION_INTEGRATION".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "some-string".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("some-string".to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2206,17 +2123,21 @@ fn test_create_stage_with_file_format() { Statement::CreateStage { file_format, .. } => { assert!(file_format.options.contains(&KeyValueOption { option_name: "COMPRESSION".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("AUTO".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("AUTO".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "BINARY_FORMAT".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("HEX".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("HEX".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "ESCAPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - r#"\\"#.to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString(r#"\\"#.to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2238,13 +2159,13 @@ fn test_create_stage_with_copy_options() { Statement::CreateStage { copy_options, .. } => { assert!(copy_options.options.contains(&KeyValueOption { option_name: "ON_ERROR".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - "CONTINUE".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("CONTINUE".to_string()).with_empty_span() + ), })); assert!(copy_options.options.contains(&KeyValueOption { option_name: "FORCE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(true)), + option_value: KeyValueOptionKind::Single(Value::Boolean(true).with_empty_span()), })); } _ => unreachable!(), @@ -2375,27 +2296,27 @@ fn test_copy_into_with_stage_params() { ); assert!(stage_params.credentials.options.contains(&KeyValueOption { option_name: "AWS_KEY_ID".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "1a2b3c".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("1a2b3c".to_string()).with_empty_span() + ), })); assert!(stage_params.credentials.options.contains(&KeyValueOption { option_name: "AWS_SECRET_KEY".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "4x5y6z".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("4x5y6z".to_string()).with_empty_span() + ), })); assert!(stage_params.encryption.options.contains(&KeyValueOption { option_name: "MASTER_KEY".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "key".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("key".to_string()).with_empty_span() + ), })); assert!(stage_params.encryption.options.contains(&KeyValueOption { option_name: "TYPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "AWS_SSE_KMS".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("AWS_SSE_KMS".to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2546,17 +2467,21 @@ fn test_copy_into_file_format() { Statement::CopyIntoSnowflake { file_format, .. } => { assert!(file_format.options.contains(&KeyValueOption { option_name: "COMPRESSION".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("AUTO".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("AUTO".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "BINARY_FORMAT".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("HEX".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("HEX".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "ESCAPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - r#"\\"#.to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString(r#"\\"#.to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2584,17 +2509,21 @@ fn test_copy_into_file_format() { Statement::CopyIntoSnowflake { file_format, .. } => { assert!(file_format.options.contains(&KeyValueOption { option_name: "COMPRESSION".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("AUTO".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("AUTO".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "BINARY_FORMAT".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("HEX".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("HEX".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "ESCAPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - r#"\\"#.to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString(r#"\\"#.to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2615,13 +2544,13 @@ fn test_copy_into_copy_options() { Statement::CopyIntoSnowflake { copy_options, .. } => { assert!(copy_options.options.contains(&KeyValueOption { option_name: "ON_ERROR".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - "CONTINUE".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("CONTINUE".to_string()).with_empty_span() + ), })); assert!(copy_options.options.contains(&KeyValueOption { option_name: "FORCE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(true)), + option_value: KeyValueOptionKind::Single(Value::Boolean(true).with_empty_span()), })); } _ => unreachable!(), @@ -2755,6 +2684,21 @@ fn test_snowflake_copy_into_stage_name_ends_with_parens() { } } +#[test] +fn test_snowflake_stage_name_with_special_chars() { + // Stage path with '=' (Hive-style partitioning) + snowflake().verified_stmt("SELECT * FROM @stage/day=18/23.parquet"); + + // Stage path with ':' (time-based partitioning) + snowflake().verified_stmt("SELECT * FROM @stage/0:18:23/23.parquet"); + + // COPY INTO with '=' in stage path + snowflake().verified_stmt("COPY INTO my_table FROM @stage/day=18/file.parquet"); + + // COPY INTO with ':' in stage path + snowflake().verified_stmt("COPY INTO my_table FROM @stage/0:18:23/file.parquet"); +} + #[test] fn test_snowflake_trim() { let real_sql = r#"SELECT customer_id, TRIM(sub_items.value:item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; @@ -3330,12 +3274,13 @@ fn parse_view_column_descriptions() { #[test] fn test_parentheses_overflow() { - // TODO: increase / improve after we fix the recursion limit - // for real (see https://github.com/apache/datafusion-sqlparser-rs/issues/984) - let max_nesting_level: usize = 25; + // Use a modest nesting level to avoid actual stack overflow on + // CI runners with small thread stacks (debug builds use large frames + // and each nesting level adds extra depth via maybe_parse). + let max_nesting_level: usize = 20; - // Verify the recursion check is not too wasteful... (num of parentheses - 2 is acceptable) - let slack = 2; + // Verify the recursion check is not too wasteful (num of parentheses within budget) + let slack = 3; let l_parens = "(".repeat(max_nesting_level - slack); let r_parens = ")".repeat(max_nesting_level - slack); let sql = format!("SELECT * FROM {l_parens}a.b.c{r_parens}"); @@ -3343,8 +3288,8 @@ fn test_parentheses_overflow() { snowflake_with_recursion_limit(max_nesting_level).parse_sql_statements(sql.as_str()); assert_eq!(parsed.err(), None); - // Verify the recursion check triggers... (num of parentheses - 1 is acceptable) - let slack = 1; + // Verify the recursion check triggers (one more paren exceeds the budget) + let slack = 2; let l_parens = "(".repeat(max_nesting_level - slack); let r_parens = ")".repeat(max_nesting_level - slack); let sql = format!("SELECT * FROM {l_parens}a.b.c{r_parens}"); @@ -3534,6 +3479,344 @@ fn test_table_sample() { snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) SEED (1)"); } +#[test] +fn test_subquery_sample() { + // Test SAMPLE clause on subqueries (derived tables) + snowflake_and_generic().verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10000 ROWS)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) AS t SAMPLE (50 PERCENT)"); + // Nested subquery with SAMPLE + snowflake_and_generic().verified_stmt( + "SELECT * FROM (SELECT * FROM (SELECT report_from FROM mytable) SAMPLE (10000 ROWS)) AS anon_1", + ); + // SAMPLE with SEED on subquery + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10) SEED (42)"); +} + +#[test] +fn test_multi_table_insert_unconditional() { + // Basic unconditional multi-table insert + // See: https://docs.snowflake.com/en/sql-reference/sql/insert-multi-table + snowflake().verified_stmt("INSERT ALL INTO t1 SELECT n1, n2, n3 FROM src"); + + // Multiple INTO clauses + snowflake().verified_stmt("INSERT ALL INTO t1 INTO t2 SELECT n1, n2, n3 FROM src"); + + // With column list + snowflake().verified_stmt("INSERT ALL INTO t1 (c1, c2, c3) SELECT n1, n2, n3 FROM src"); + + // With VALUES clause + snowflake().verified_stmt( + "INSERT ALL INTO t1 (c1, c2, c3) VALUES (n2, n1, DEFAULT) SELECT n1, n2, n3 FROM src", + ); + + // Complex example from Snowflake docs + snowflake().verified_stmt( + "INSERT ALL INTO t1 INTO t1 (c1, c2, c3) VALUES (n2, n1, DEFAULT) INTO t2 (c1, c2, c3) INTO t2 VALUES (n3, n2, n1) SELECT n1, n2, n3 FROM src" + ); + + // With OVERWRITE + snowflake().verified_stmt("INSERT OVERWRITE ALL INTO t1 INTO t2 SELECT n1, n2, n3 FROM src"); +} + +#[test] +fn test_multi_table_insert_conditional() { + // Basic conditional multi-table insert with WHEN clause + // See: https://docs.snowflake.com/en/sql-reference/sql/insert-multi-table + snowflake().verified_stmt("INSERT ALL WHEN n1 > 100 THEN INTO t1 SELECT n1 FROM src"); + + // Multiple WHEN clauses + snowflake().verified_stmt( + "INSERT ALL WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t2 SELECT n1 FROM src", + ); + + // WHEN with multiple INTO clauses + snowflake().verified_stmt("INSERT ALL WHEN n1 > 10 THEN INTO t1 INTO t2 SELECT n1 FROM src"); + + // With ELSE clause + snowflake() + .verified_stmt("INSERT ALL WHEN n1 > 100 THEN INTO t1 ELSE INTO t2 SELECT n1 FROM src"); + + // Complex conditional insert from Snowflake docs + snowflake().verified_stmt( + "INSERT ALL WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t1 INTO t2 ELSE INTO t2 SELECT n1 FROM src" + ); + + // INSERT FIRST - only first matching WHEN clause executes + snowflake().verified_stmt( + "INSERT FIRST WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t1 INTO t2 ELSE INTO t2 SELECT n1 FROM src" + ); + + // With OVERWRITE + snowflake().verified_stmt( + "INSERT OVERWRITE ALL WHEN n1 > 100 THEN INTO t1 ELSE INTO t2 SELECT n1 FROM src", + ); + + // WHEN with always-true condition + snowflake().verified_stmt("INSERT ALL WHEN 1 = 1 THEN INTO t1 SELECT n1 FROM src"); +} + +#[test] +fn test_multi_table_insert_with_values() { + // INTO clause with VALUES using column references + snowflake().verified_stmt("INSERT ALL INTO t1 VALUES (n1, n2) SELECT n1, n2 FROM src"); + + // INTO clause with VALUES using DEFAULT + snowflake().verified_stmt( + "INSERT ALL INTO t1 (c1, c2, c3) VALUES (n1, n2, DEFAULT) SELECT n1, n2 FROM src", + ); + + // INTO clause with VALUES using NULL + snowflake().verified_stmt( + "INSERT ALL INTO t1 (c1, c2, c3) VALUES (n1, NULL, n2) SELECT n1, n2 FROM src", + ); + + // Positional alias in VALUES + snowflake().verified_stmt("INSERT ALL INTO t1 VALUES ($1, $2) SELECT 1, 50 AS an_alias"); +} + +/// Unit tests for multi-table INSERT AST structure validation +#[test] +fn test_multi_table_insert_ast_unconditional() { + // Test basic unconditional multi-table insert AST + let sql = "INSERT ALL INTO t1 INTO t2 (c1, c2) SELECT n1, n2 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_insert_type, + overwrite, + multi_table_into_clauses, + multi_table_when_clauses, + multi_table_else_clause, + source, + .. + }) => { + // Should be INSERT ALL (not FIRST) + assert_eq!(multi_table_insert_type, Some(MultiTableInsertType::All)); + assert!(!overwrite); + + // Should have 2 INTO clauses + assert_eq!(multi_table_into_clauses.len(), 2); + + // First INTO clause: INTO t1 + assert_eq!(multi_table_into_clauses[0].table_name.to_string(), "t1"); + assert!(multi_table_into_clauses[0].columns.is_empty()); + assert!(multi_table_into_clauses[0].values.is_none()); + + // Second INTO clause: INTO t2 (c1, c2) + assert_eq!(multi_table_into_clauses[1].table_name.to_string(), "t2"); + assert_eq!(multi_table_into_clauses[1].columns.len(), 2); + assert_eq!(multi_table_into_clauses[1].columns[0].to_string(), "c1"); + assert_eq!(multi_table_into_clauses[1].columns[1].to_string(), "c2"); + assert!(multi_table_into_clauses[1].values.is_none()); + + // No WHEN clauses for unconditional insert + assert!(multi_table_when_clauses.is_empty()); + assert!(multi_table_else_clause.is_none()); + + // Should have source query + assert!(source.is_some()); + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_with_values() { + // Test INTO clause with VALUES + let sql = "INSERT ALL INTO t1 (c1, c2, c3) VALUES (n1, n2, DEFAULT) SELECT n1, n2 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_into_clauses, + .. + }) => { + assert_eq!(multi_table_into_clauses.len(), 1); + + let into_clause = &multi_table_into_clauses[0]; + assert_eq!(into_clause.table_name.to_string(), "t1"); + assert_eq!(into_clause.columns.len(), 3); + + // Check VALUES clause + let values = into_clause.values.as_ref().expect("Expected VALUES clause"); + assert_eq!(values.values.len(), 3); + + // First value: n1 (expression) + match &values.values[0] { + MultiTableInsertValue::Expr(expr) => { + assert_eq!(expr.to_string(), "n1"); + } + _ => panic!("Expected Expr"), + } + + // Second value: n2 (expression) + match &values.values[1] { + MultiTableInsertValue::Expr(expr) => { + assert_eq!(expr.to_string(), "n2"); + } + _ => panic!("Expected Expr"), + } + + // Third value: DEFAULT + match &values.values[2] { + MultiTableInsertValue::Default => {} + _ => panic!("Expected DEFAULT"), + } + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_conditional() { + // Test conditional multi-table insert with WHEN clauses + let sql = "INSERT ALL WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t2 INTO t3 ELSE INTO t4 SELECT n1 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_insert_type, + multi_table_into_clauses, + multi_table_when_clauses, + multi_table_else_clause, + .. + }) => { + // Should be INSERT ALL (not FIRST) + assert_eq!(multi_table_insert_type, Some(MultiTableInsertType::All)); + + // Unconditional INTO clauses should be empty for conditional insert + assert!(multi_table_into_clauses.is_empty()); + + // Should have 2 WHEN clauses + assert_eq!(multi_table_when_clauses.len(), 2); + + // First WHEN clause: WHEN n1 > 100 THEN INTO t1 + assert_eq!( + multi_table_when_clauses[0].condition.to_string(), + "n1 > 100" + ); + assert_eq!(multi_table_when_clauses[0].into_clauses.len(), 1); + assert_eq!( + multi_table_when_clauses[0].into_clauses[0] + .table_name + .to_string(), + "t1" + ); + + // Second WHEN clause: WHEN n1 > 10 THEN INTO t2 INTO t3 + assert_eq!(multi_table_when_clauses[1].condition.to_string(), "n1 > 10"); + assert_eq!(multi_table_when_clauses[1].into_clauses.len(), 2); + assert_eq!( + multi_table_when_clauses[1].into_clauses[0] + .table_name + .to_string(), + "t2" + ); + assert_eq!( + multi_table_when_clauses[1].into_clauses[1] + .table_name + .to_string(), + "t3" + ); + + // ELSE clause: ELSE INTO t4 + let else_clause = multi_table_else_clause.expect("Expected ELSE clause"); + assert_eq!(else_clause.len(), 1); + assert_eq!(else_clause[0].table_name.to_string(), "t4"); + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_first() { + // Test INSERT FIRST vs INSERT ALL + let sql = + "INSERT FIRST WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t2 SELECT n1 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_insert_type, + multi_table_when_clauses, + .. + }) => { + // Should be INSERT FIRST + assert_eq!(multi_table_insert_type, Some(MultiTableInsertType::First)); + assert_eq!(multi_table_when_clauses.len(), 2); + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_overwrite() { + // Test INSERT OVERWRITE ALL + let sql = "INSERT OVERWRITE ALL INTO t1 INTO t2 SELECT n1 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + overwrite, + multi_table_insert_type, + multi_table_into_clauses, + .. + }) => { + assert!(overwrite); + assert_eq!(multi_table_insert_type, Some(MultiTableInsertType::All)); + assert_eq!(multi_table_into_clauses.len(), 2); + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_complex_values() { + // Test complex VALUES with expressions + let sql = "INSERT ALL INTO t1 VALUES (n1 + n2, n3 * 2, DEFAULT) SELECT n1, n2, n3 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_into_clauses, + .. + }) => { + assert_eq!(multi_table_into_clauses.len(), 1); + + let values = multi_table_into_clauses[0] + .values + .as_ref() + .expect("Expected VALUES"); + assert_eq!(values.values.len(), 3); + + // First value: n1 + n2 (binary expression) + match &values.values[0] { + MultiTableInsertValue::Expr(Expr::BinaryOp { op, .. }) => { + assert_eq!(*op, BinaryOperator::Plus); + } + _ => panic!("Expected BinaryOp expression"), + } + + // Second value: n3 * 2 (binary expression) + match &values.values[1] { + MultiTableInsertValue::Expr(Expr::BinaryOp { op, .. }) => { + assert_eq!(*op, BinaryOperator::Multiply); + } + _ => panic!("Expected BinaryOp expression"), + } + + // Third value: DEFAULT + assert!(matches!(&values.values[2], MultiTableInsertValue::Default)); + } + _ => panic!("Expected INSERT statement"), + } +} + #[test] fn parse_ls_and_rm() { snowflake().one_statement_parses_to("LS @~", "LIST @~"); @@ -3749,6 +4032,32 @@ fn test_timetravel_at_before() { .verified_only_select("SELECT * FROM tbl BEFORE(TIMESTAMP => '2024-12-15 00:00:00')"); } +#[test] +fn test_changes_clause() { + // CHANGES with AT and END + snowflake().verified_stmt( + r#"SELECT a FROM "PCH_ODS_FIDELIO"."SRC_VW_SYS_ACC_MASTER" CHANGES(INFORMATION => DEFAULT) AT(TIMESTAMP => TO_TIMESTAMP_TZ('2026-02-18 11:23:19.660000000')) END(TIMESTAMP => TO_TIMESTAMP_TZ('2026-02-18 11:38:30.211000000'))"#, + ); + + // CHANGES with AT only (no END) + snowflake().verified_stmt( + "SELECT a FROM t CHANGES(INFORMATION => DEFAULT) AT(TIMESTAMP => TO_TIMESTAMP_TZ('2026-02-18 11:23:19.660000000'))", + ); + + // CHANGES with APPEND_ONLY + snowflake().verified_stmt( + "SELECT a FROM t CHANGES(INFORMATION => APPEND_ONLY) AT(TIMESTAMP => TO_TIMESTAMP_TZ('2026-01-01 00:00:00'))", + ); + + // CHANGES with OFFSET + snowflake().verified_stmt("SELECT a FROM t CHANGES(INFORMATION => DEFAULT) AT(OFFSET => -60)"); + + // CHANGES with STATEMENT + snowflake().verified_stmt( + "SELECT a FROM t CHANGES(INFORMATION => DEFAULT) AT(STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')", + ); +} + #[test] fn test_grant_account_global_privileges() { let privileges = vec![ @@ -4396,6 +4705,27 @@ END assert_eq!(2, exception[1].statements.len()); } +#[test] +fn test_begin_transaction() { + snowflake().verified_stmt("BEGIN TRANSACTION"); + snowflake().verified_stmt("BEGIN WORK"); + + // BEGIN TRANSACTION with statements + let stmts = snowflake() + .parse_sql_statements("BEGIN TRANSACTION; DROP TABLE IF EXISTS bla; COMMIT") + .unwrap(); + assert_eq!(3, stmts.len()); + + // Bare BEGIN (no TRANSACTION keyword) with statements + let stmts = snowflake() + .parse_sql_statements("BEGIN; DROP TABLE IF EXISTS bla; COMMIT") + .unwrap(); + assert_eq!(3, stmts.len()); + + // Bare BEGIN at EOF (no semicolon, no TRANSACTION keyword) + snowflake().verified_stmt("BEGIN"); +} + #[test] fn test_snowflake_fetch_clause_syntax() { let canonical = "SELECT c1 FROM fetch_test FETCH FIRST 2 ROWS ONLY"; @@ -4436,6 +4766,17 @@ fn test_snowflake_create_view_with_composite_policy_name() { snowflake().verified_stmt(create_view_with_tag); } +#[test] +fn test_snowflake_create_view_copy_grants() { + snowflake().verified_stmt("CREATE OR REPLACE VIEW bla COPY GRANTS AS (SELECT * FROM source)"); + snowflake() + .verified_stmt("CREATE OR REPLACE SECURE VIEW bla COPY GRANTS AS (SELECT * FROM source)"); + // COPY GRANTS with column list + snowflake().verified_stmt( + "CREATE OR REPLACE VIEW bla COPY GRANTS (a, b) AS (SELECT a, b FROM source)", + ); +} + #[test] fn test_snowflake_identifier_function() { // Using IDENTIFIER to reference a column @@ -4654,3 +4995,31 @@ fn test_alter_dynamic_table() { snowflake().verified_stmt("ALTER DYNAMIC TABLE my_dyn_table SUSPEND"); snowflake().verified_stmt("ALTER DYNAMIC TABLE my_dyn_table RESUME"); } + +#[test] +fn test_alter_external_table() { + snowflake().verified_stmt("ALTER EXTERNAL TABLE some_table REFRESH"); + snowflake().verified_stmt("ALTER EXTERNAL TABLE some_table REFRESH 'year=2025/month=12/'"); + snowflake().verified_stmt("ALTER EXTERNAL TABLE IF EXISTS some_table REFRESH"); + snowflake() + .verified_stmt("ALTER EXTERNAL TABLE IF EXISTS some_table REFRESH 'year=2025/month=12/'"); +} + +#[test] +fn test_truncate_table_if_exists() { + snowflake().verified_stmt("TRUNCATE TABLE IF EXISTS my_table"); + snowflake().verified_stmt("TRUNCATE TABLE my_table"); + snowflake().verified_stmt("TRUNCATE IF EXISTS my_table"); +} + +#[test] +fn test_select_dollar_column_from_stage() { + // With table function args and alias + snowflake().verified_stmt("SELECT t.$1, t.$2 FROM @mystage1(file_format => 'myformat', pattern => '.*data.*[.]csv.gz') t"); + // Without table function args, with alias + snowflake().verified_stmt("SELECT t.$1, t.$2 FROM @mystage1 t"); + // Without table function args, without alias + snowflake().verified_stmt("SELECT $1, $2 FROM @mystage1"); + // With table function args, without alias + snowflake().verified_stmt("SELECT $1, $2 FROM @mystage1(file_format => 'myformat')"); +} diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 321cfef073..33c38fb0a6 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -477,6 +477,7 @@ fn parse_update_tuple_row_values() { assert_eq!( sqlite().verified_stmt("UPDATE x SET (a, b) = (1, 2)"), Statement::Update(Update { + optimizer_hints: vec![], or: None, assignments: vec![Assignment { target: AssignmentTarget::Tuple(vec![ @@ -495,6 +496,7 @@ fn parse_update_tuple_row_values() { }, from: None, returning: None, + output: None, limit: None, update_token: AttachedToken::empty() }) @@ -601,6 +603,10 @@ fn test_regexp_operator() { } ); sqlite().verified_only_select(r#"SELECT count(*) FROM messages WHERE msg_text REGEXP '\d+'"#); + + // Should return an error, not panic + assert!(sqlite().parse_sql_statements("SELECT 1 REGEXP").is_err()); + assert!(sqlite().parse_sql_statements("SELECT 1 MATCH").is_err()); } #[test]