diff --git a/src/sql-parser/src/ast/defs/expr.rs b/src/sql-parser/src/ast/defs/expr.rs index a2d3dbe614b42..f19048d092636 100644 --- a/src/sql-parser/src/ast/defs/expr.rs +++ b/src/sql-parser/src/ast/defs/expr.rs @@ -220,36 +220,50 @@ impl AstDisplay for Expr { Expr::Parameter(n) => f.write_str(&format!("${}", n)), Expr::Not { expr } => { f.write_str("NOT "); - f.write_node(expr); + // `NOT` binds tighter than `AND`/`OR`, so an operand exposing a + // looser operator on its left spine (`NOT (a OR b)`) must keep its + // parens once `Nested` is stripped. We use `left_edge`, since the + // `NOT` sits to the operand's left. + write_binary_operand(f, expr, left_edge(expr) < prec::NOT); } Expr::And { left, right } => { - f.write_node(left); + write_binary_operand(f, left, right_edge(left) < prec::AND); f.write_str(" AND "); - f.write_node(right); + write_binary_operand(f, right, left_edge(right) <= prec::AND); } Expr::Or { left, right } => { - f.write_node(left); + write_binary_operand(f, left, right_edge(left) < prec::OR); f.write_str(" OR "); - f.write_node(right); + write_binary_operand(f, right, left_edge(right) <= prec::OR); } Expr::IsExpr { expr, negated, construct, } => { - f.write_node(&expr); + write_binary_operand(f, expr, right_edge(expr) < prec::IS); f.write_str(" IS "); if *negated { f.write_str("NOT "); } - f.write_node(construct); + // `IS DISTINCT FROM ` parses the RHS at the `IS` precedence + // (see `Parser::parse_is`), so a RHS whose left spine binds at or + // below `IS` re-associates out of the `IS` unless parenthesized + // (`a IS DISTINCT FROM b OR c` is `(a IS DISTINCT FROM b) OR c`). + // The other constructs (`NULL`/`TRUE`/…) are bare keywords. + if let IsExprConstruct::DistinctFrom(rhs) = construct { + f.write_str("DISTINCT FROM "); + write_binary_operand(f, rhs, left_edge(rhs) <= prec::IS); + } else { + f.write_node(construct); + } } Expr::InList { expr, list, negated, } => { - f.write_node(&expr); + write_binary_operand(f, expr, right_edge(expr) < prec::LIKE); f.write_str(" "); if *negated { f.write_str("NOT "); @@ -263,7 +277,7 @@ impl AstDisplay for Expr { subquery, negated, } => { - f.write_node(&expr); + write_binary_operand(f, expr, right_edge(expr) < prec::LIKE); f.write_str(" "); if *negated { f.write_str("NOT "); @@ -279,7 +293,7 @@ impl AstDisplay for Expr { case_insensitive, negated, } => { - f.write_node(&expr); + write_binary_operand(f, expr, right_edge(expr) < prec::LIKE); f.write_str(" "); if *negated { f.write_str("NOT "); @@ -288,10 +302,21 @@ impl AstDisplay for Expr { f.write_str("I"); } f.write_str("LIKE "); - f.write_node(&pattern); + // The pattern and escape parse at `Like` precedence and sit to the + // right of the keyword, so an operand exposing a precedence at or + // below `Like` on its left spine (e.g. an `IN`/`LIKE`/`BETWEEN` at + // equal precedence) re-associates unless parenthesized. + // `a LIKE b IN (q)` parses as `(a LIKE b) IN (q)`. When an `ESCAPE` + // follows, the pattern is *also* immediately left of `ESCAPE`: a + // `[I]LIKE` exposed on the pattern's right spine would steal the + // `ESCAPE` as its own (`a LIKE NOT b LIKE c ESCAPE d` parses the + // escape onto the inner `b LIKE c`), so guard the right edge too. + let pattern_parens = left_edge(pattern) <= prec::LIKE + || (escape.is_some() && right_edge(pattern) <= prec::LIKE); + write_binary_operand(f, pattern, pattern_parens); if let Some(escape) = escape { f.write_str(" ESCAPE "); - f.write_node(escape); + write_binary_operand(f, escape, left_edge(escape) <= prec::LIKE); } } Expr::Between { @@ -300,22 +325,34 @@ impl AstDisplay for Expr { low, high, } => { - f.write_node(&expr); + // The subject is the LHS of the `BETWEEN` infix (parsed at + // `Like`). A spine exposed at or below `Like` on its right would + // pull `BETWEEN` inside it (`a OR b BETWEEN …` is `a OR (b BETWEEN + // …)`), so parenthesize via `right_edge`. Parser ASTs wrap such a + // subject in `Nested` (`ATOM`), so they're unaffected. + write_binary_operand(f, expr, right_edge(expr) < prec::LIKE); if *negated { f.write_str(" NOT"); } f.write_str(" BETWEEN "); - f.write_node(&low); + write_between_bound(f, low); f.write_str(" AND "); - f.write_node(&high); + write_between_bound(f, high); } Expr::Op { op, expr1, expr2 } => { if let Some(expr2) = expr2 { - f.write_node(&expr1); + // Binary operators are left-associative: parenthesize an + // operand that would re-associate once `Nested` is stripped. + // The left by its `right_edge` (strictly looser than `op`), the + // right by its `left_edge` (equal-or-looser, as equal + // re-associates left). See those helpers for the spine + // reasoning. + let p = binary_op_precedence(op); + write_binary_operand(f, expr1, right_edge(expr1) < p); f.write_str(" "); f.write_str(op); f.write_str(" "); - f.write_node(&expr2); + write_binary_operand(f, expr2, left_edge(expr2) <= p); } else { f.write_str(op); f.write_str(" "); @@ -334,7 +371,20 @@ impl AstDisplay for Expr { } } Expr::Cast { expr, data_type } => { - f.write_node(&expr); + // `::` binds very tightly, so a non-self-delimiting operand must + // be parenthesized or the cast re-associates into its spine. + // `CAST(-0 AS int4)` (i.e. `Cast(- 0)`) would otherwise print as + // `- 0::int4` and reparse as `- (0::int4)`. The parser wraps such + // operands in `Expr::Nested`, but `normalize` strips those, so the + // printer must re-add them (mirroring the `Collate` arm). `Nested` + // is itself self-delimiting, so parser-produced ASTs don't double up. + if prints_self_delimiting(expr) { + f.write_node(&expr); + } else { + f.write_str("("); + f.write_node(&expr); + f.write_str(")"); + } f.write_str("::"); f.write_node(data_type); } @@ -416,7 +466,7 @@ impl AstDisplay for Expr { f.write_str(")"); } Expr::AnySubquery { left, op, right } => { - write_quantified_left(f, left); + write_quantified_left(f, left, op); f.write_str(" "); f.write_str(op); f.write_str(" ANY ("); @@ -424,7 +474,7 @@ impl AstDisplay for Expr { f.write_str(")"); } Expr::AnyExpr { left, op, right } => { - write_quantified_left(f, left); + write_quantified_left(f, left, op); f.write_str(" "); f.write_str(op); f.write_str(" ANY ("); @@ -432,7 +482,7 @@ impl AstDisplay for Expr { f.write_str(")"); } Expr::AllSubquery { left, op, right } => { - write_quantified_left(f, left); + write_quantified_left(f, left, op); f.write_str(" "); f.write_str(op); f.write_str(" ALL ("); @@ -440,7 +490,7 @@ impl AstDisplay for Expr { f.write_str(")"); } Expr::AllExpr { left, op, right } => { - write_quantified_left(f, left); + write_quantified_left(f, left, op); f.write_str(" "); f.write_str(op); f.write_str(" ALL ("); @@ -505,14 +555,28 @@ impl_display_t!(Expr); /// the lexer and parser greedily extend adjacent tokens: `1.x` tokenizes the /// number `1.` and leaves `x` as an alias, and `'a'::T.x` consumes `T.x` as a /// qualified type name. The whitelist below covers receivers that print as -/// self-terminating syntax (identifiers, parenthesized exprs, function calls, -/// bracketed collections, etc.); anything else gets explicit parens. +/// self-terminating syntax (parenthesized exprs, function calls, bracketed +/// collections, etc.). Anything else gets explicit parens. +/// +/// A bare `Identifier`/`QualifiedWildcard` receiver is *not* safe: `a` then +/// `.b`/`.*` prints as `a.b`/`a.*`, which reparses as the qualified identifier +/// `Identifier([a, b])` / `QualifiedWildcard([a])` rather than a field/wildcard +/// access. The parser only ever builds those accesses over a parenthesized +/// receiver (`(a).b`), so it wraps the name in `Expr::Nested`. A bare name here +/// is a `Nested`-stripped AST and must be re-parenthesized. A `FieldAccess` / +/// `WildcardAccess` receiver *is* safe, because its own printing already +/// parenthesizes a bare-name base (`(a).b.c`), so the chain stays self-delimiting. +/// +/// The quantified-subquery forms (`AnySubquery`/`AllSubquery`, printed +/// ` ANY ()`) are likewise *not* safe: they end in a `(query)` +/// that is only a sub-part, so a trailing `.x`/`.*` binds to that inner subquery +/// rather than the whole expression. (Contrast `Subquery`/`ArraySubquery`/… which +/// are a single `(…)`/`ARRAY(…)` primary, so a trailing dot attaches to the whole +/// thing.) fn write_dot_receiver(f: &mut AstFormatter, expr: &Expr) { let safe = matches!( expr, - Expr::Identifier(_) - | Expr::QualifiedWildcard(_) - | Expr::FieldAccess { .. } + Expr::FieldAccess { .. } | Expr::WildcardAccess(_) | Expr::Parameter(_) | Expr::Nested(_) @@ -521,8 +585,6 @@ fn write_dot_receiver(f: &mut AstFormatter, expr: | Expr::Case { .. } | Expr::Exists(_) | Expr::Subquery(_) - | Expr::AnySubquery { .. } - | Expr::AllSubquery { .. } | Expr::Array(_) | Expr::ArraySubquery(_) | Expr::List(_) @@ -549,29 +611,27 @@ fn write_dot_receiver(f: &mut AstFormatter, expr: } } -/// Write `left` as the LHS of ` ANY/ALL (...)`. The operator on -/// the right is a binary infix op whose precedence will reach inside an -/// already-low-precedence lhs (`Like`, `In*`, `Between`, `Is*`, `And`, -/// `Or`, `Not`, nested `AnyExpr`/`AllExpr`) and reparse it as part of the -/// quantified expression's left rather than as a wrapper around it. -/// Parenthesize those; let normal infix `Op` use its existing precedence -/// to print without parens. -fn write_quantified_left(f: &mut AstFormatter, expr: &Expr) { - let needs_parens = matches!( - expr, - Expr::Like { .. } - | Expr::Between { .. } - | Expr::IsExpr { .. } - | Expr::InList { .. } - | Expr::InSubquery { .. } - | Expr::And { .. } - | Expr::Or { .. } - | Expr::Not { .. } - | Expr::AnyExpr { .. } - | Expr::AllExpr { .. } - | Expr::AnySubquery { .. } - | Expr::AllSubquery { .. } - ); +/// Write `left` as the LHS of ` ANY/ALL (...)`. The printed `` is an +/// ordinary binary infix. It can be any operator the parser accepts here, from +/// `=`/`<` (`Cmp`) all the way down to `*`/`/`/`%` (`MultiplyDivide`), and on +/// reparse it binds into any operator exposed on `left`'s right spine that is +/// *strictly looser* than `` itself, stealing that suffix into the quantified +/// expression's left rather than wrapping the whole `left`. So parenthesize +/// exactly when `left`'s [`right_edge`] binds looser than ``'s own precedence +/// ([`binary_op_precedence`]), mirroring the binary-`Op` arm. Using the operator's +/// real precedence (not a fixed `Like` threshold) both parenthesizes a +/// tighter-binding `` over a looser left and leaves an equal-or-tighter left +/// bare (`a = b = ANY (…)`, `a LIKE b = ANY (…)`), which the old fixed threshold +/// over-parenthesized. The tighter-binding case, `(a + b) * ANY (…)`, would +/// otherwise print `a + b * ANY (…)` and reparse as the different +/// `a + (b * ANY (…))`. [`right_edge`] also sees a looser spine hidden under +/// right-transparent prefixes, e.g. the `NOT`'s `IN` in `- NOT a IN (b) = ANY (…)`. +fn write_quantified_left( + f: &mut AstFormatter, + expr: &Expr, + op: &Op, +) { + let needs_parens = right_edge(expr) < binary_op_precedence(op); if needs_parens { f.write_str("("); f.write_node(expr); @@ -581,6 +641,191 @@ fn write_quantified_left(f: &mut AstFormatter, exp } } +/// Write `bound` as a `BETWEEN … AND …` bound. The parser parses both bounds with +/// `parse_subexpr(Precedence::Like)` (see `Parser::parse_between`), starting fresh +/// with nothing to the bound's left, so it walks the bound's *left spine* and +/// stops at the first operator binding at or below `Like`, leaving that operator +/// outside the bound (`x BETWEEN 1 IS NULL AND y` parses `1` as the bound, then +/// expects `AND` but finds `IS`). A bound is therefore safe bare only when its +/// left edge binds strictly above `Like`. Use [`left_edge`] (not [`right_edge`], +/// which closes at `ATOM` for the right-closing `IS NULL`/`= ANY (…)`/`IN (…)` +/// forms whose looseness is on the left). The parser wraps these bounds in +/// `Expr::Nested` (which is `ATOM`, so it prints bare). This re-adds the parens +/// for ASTs where that wrapper is absent. +fn write_between_bound(f: &mut AstFormatter, bound: &Expr) { + let needs_parens = left_edge(bound) <= prec::LIKE; + if needs_parens { + f.write_str("("); + f.write_node(bound); + f.write_str(")"); + } else { + f.write_node(bound); + } +} + +/// Output-precedence ranks, derived directly from the parser's [`Precedence`] +/// ladder (higher binds tighter) so it stays the single source of truth: +/// reordering or inserting a parser level reranks these automatically, and only +/// the variant each rank maps to is maintained by hand. They classify the *top* +/// operator an expr prints with, so the binary-operator printer can parenthesize +/// an operand that would otherwise re-associate on reparse. `ATOM`, the one rank +/// with no parser counterpart, is layered one above the tightest parser level to +/// mark the self-delimiting primaries. They never need parens. +/// +/// [`Precedence`]: crate::parser::Precedence +// `Precedence` is a fieldless enum with a handful of variants, so reading each +// discriminant with `as u8` is exact and lossless. +#[allow(clippy::as_conversions)] +mod prec { + use crate::parser::Precedence; + + pub const OR: u8 = Precedence::Or as u8; + pub const AND: u8 = Precedence::And as u8; + pub const NOT: u8 = Precedence::PrefixNot as u8; + pub const IS: u8 = Precedence::Is as u8; + pub const CMP: u8 = Precedence::Cmp as u8; + pub const LIKE: u8 = Precedence::Like as u8; + pub const OTHER: u8 = Precedence::Other as u8; + pub const PLUS_MINUS: u8 = Precedence::PlusMinus as u8; + pub const MULTIPLY_DIVIDE: u8 = Precedence::MultiplyDivide as u8; + // The `COLLATE` and postfix (`::`/`[…]`) parser levels live between + // `MULTIPLY_DIVIDE` and `ATOM`, but neither edge function ever *returns* them: + // those forms are self-delimiting (their own operand is parenthesized when it + // isn't), so both their edges rank `ATOM`. Kept for parity with the ladder. + #[allow(dead_code)] + pub const COLLATE: u8 = Precedence::PostfixCollateAt as u8; + pub const PREFIX: u8 = Precedence::PrefixPlusMinus as u8; + #[allow(dead_code)] + pub const POSTFIX: u8 = Precedence::PostfixSubscriptCast as u8; + pub const ATOM: u8 = Precedence::PostfixSubscriptCast as u8 + 1; +} + +/// The precedence of a binary operator, mirroring `Parser::get_next_precedence`. +/// A namespaced `OPERATOR(...)` binds at `OTHER`, like the parser. +fn binary_op_precedence(op: &Op) -> u8 { + if op.namespace.is_some() { + return prec::OTHER; + } + match op.op.as_str() { + "=" | "<" | "<=" | "<>" | "!=" | ">" | ">=" => prec::CMP, + "+" | "-" => prec::PLUS_MINUS, + "*" | "/" | "%" => prec::MULTIPLY_DIVIDE, + _ => prec::OTHER, + } +} + +/// The precedence at which a prefix operator (`Op` with no second operand) +/// parses its operand, mirroring `Parser::parse_prefix`: `-`/`+` at +/// `PrefixPlusMinus`, but `~` (and namespaced prefixes) at `Other`, so `~ a + b` +/// parses as `~ (a + b)`. `~` binds looser than `+`/`-`/`*`. +fn unary_prec(op: &Op) -> u8 { + if op.namespace.is_none() && (op.op == "-" || op.op == "+") { + prec::PREFIX + } else { + prec::OTHER + } +} + +/// The loosest precedence exposed on `expr`'s *right spine*, the precedence at +/// which an operator printed immediately to its right would bind *into* it +/// rather than wrap it. For a left operand / subject of a construct that prints +/// to its right, this is what decides parenthesization (its mirror, [`left_edge`], +/// decides right operands), because a prefix operator and the right operand of a +/// binary/`BETWEEN`/`LIKE`/`IS DISTINCT FROM` are right-transparent: +/// `- NOT a IN (b)` exposes the `NOT`'s `IN` on the right even though its top node +/// is unary `-`. Forms that close with a bracket on the right (`(…)`, `[…]`, +/// `::type`, `IS NULL`) are `ATOM`. +fn right_edge(expr: &Expr) -> u8 { + match expr { + // Right-transparent binary infixes: an operator tighter than this one + // binds into the right operand, which itself may expose a looser spine. + Expr::Or { right, .. } => prec::OR.min(right_edge(right)), + Expr::And { right, .. } => prec::AND.min(right_edge(right)), + Expr::Op { + op, expr2: Some(r), .. + } => binary_op_precedence(op).min(right_edge(r)), + // Prefix operators expose their operand's right spine. + Expr::Op { + op, + expr1, + expr2: None, + } => unary_prec(op).min(right_edge(expr1)), + Expr::Not { expr } => prec::NOT.min(right_edge(expr)), + // `IS DISTINCT FROM x` exposes `x`, while `IS NULL`/`TRUE`/… close. + Expr::IsExpr { + construct: IsExprConstruct::DistinctFrom(x), + .. + } => prec::IS.min(right_edge(x)), + // `… BETWEEN low AND high` exposes `high`. `… [I]LIKE pat [ESCAPE esc]` + // exposes the rightmost of `esc`/`pat`. + Expr::Between { high, .. } => prec::LIKE.min(right_edge(high)), + Expr::Like { + pattern, escape, .. + } => { + let rightmost = escape.as_deref().unwrap_or_else(|| pattern.as_ref()); + prec::LIKE.min(right_edge(rightmost)) + } + // Everything else closes on the right (a bracket, a keyword, a literal, + // or `IS NULL`-style), so nothing binds into it. + _ => prec::ATOM, + } +} + +/// The loosest precedence exposed on `expr`'s *left spine*, the mirror of +/// [`right_edge`]. For a *right* operand (an operator on its left), this is what +/// decides parenthesization: a left-associative operator printed to its left +/// reaches into the left spine and re-associates if that spine exposes a +/// precedence at or below the operator's. The top operator alone is not enough, +/// because a left-nested chain can bury a looser operator down its left edge: +/// `387 = ANY (...) LIKE a IN (...)` has a top `IN` (`Like`) but exposes the +/// `= ANY` (`Cmp`) on its left, so a tighter `<>` to its left +/// (`48 <> 387 = ANY (...) ...`) would steal the `<>` into the `= ANY`'s left +/// rather than leave it as the `<>`'s right operand. Forms that open with their +/// own token on the left (a prefix operator, a keyword, `(…)`, a literal) are +/// `ATOM`. +fn left_edge(expr: &Expr) -> u8 { + match expr { + // Left-transparent infixes / postfix-keyword constructs: the subject (or + // left operand) sits on the left spine, so descend into it. + Expr::Or { left, .. } => prec::OR.min(left_edge(left)), + Expr::And { left, .. } => prec::AND.min(left_edge(left)), + Expr::Op { + op, + expr1, + expr2: Some(_), + } => binary_op_precedence(op).min(left_edge(expr1)), + Expr::IsExpr { expr, .. } => prec::IS.min(left_edge(expr)), + Expr::AnyExpr { left, .. } + | Expr::AllExpr { left, .. } + | Expr::AnySubquery { left, .. } + | Expr::AllSubquery { left, .. } => prec::CMP.min(left_edge(left)), + Expr::Like { expr, .. } + | Expr::Between { expr, .. } + | Expr::InList { expr, .. } + | Expr::InSubquery { expr, .. } => prec::LIKE.min(left_edge(expr)), + // Everything else leads with its own token on the left: a prefix + // operator (`-`/`+`/`~`/`NOT`), a keyword, `(…)`, `ARRAY[…]`, a literal, + // or a `COLLATE`/`::`/`[…]` whose own operand the printer parenthesizes + // when it isn't self-delimiting. Nothing to the left binds into it. + _ => prec::ATOM, + } +} + +/// Write `operand` for a binary operator, parenthesizing it iff `needs_parens`. +fn write_binary_operand( + f: &mut AstFormatter, + operand: &Expr, + needs_parens: bool, +) { + if needs_parens { + f.write_str("("); + f.write_node(operand); + f.write_str(")"); + } else { + f.write_node(operand); + } +} + /// Whether `expr` prints in a *self-delimiting* form — atomic, or wrapped in its /// own brackets/parens (`name(...)`, `(…)`, `ARRAY[…]`, `CASE … END`, …) — so it /// is safe to print immediately to the left of a tight postfix operator (`::`, @@ -661,14 +906,45 @@ fn prefix_operand_needs_parens(operand: &Expr) -> bool { /// regular subscript. Parenthesize identifiers whose last component is a /// context-sensitive keyword so the round trip stays an identifier subscript. fn write_subscript_receiver(f: &mut AstFormatter, expr: &Expr) { - let needs_parens = if let Expr::Identifier(idents) = expr { - idents + let needs_parens = match expr { + // A bare keyword identifier (`map`, `list`, …) dispatches to the + // map/list-literal grammar before `[`, so it needs parens even though + // identifiers are otherwise safe receivers. + Expr::Identifier(idents) => idents .last() .and_then(|id| id.as_keyword()) .map(|kw| kw.is_context_sensitive_keyword()) - .unwrap_or(false) - } else { - false + .unwrap_or(false), + // Self-delimiting primaries, the bracketed collections, and the postfix + // forms that end in an identifier or `)` are safe: a following `[…]` + // attaches to the whole receiver as a fresh subscript. + Expr::QualifiedWildcard(_) + | Expr::Parameter(_) + | Expr::Value(_) + | Expr::Function(_) + | Expr::HomogenizingFunction { .. } + | Expr::NullIf { .. } + | Expr::Nested(_) + | Expr::Subquery(_) + | Expr::Exists(_) + | Expr::Case { .. } + | Expr::Row { .. } + | Expr::Array(_) + | Expr::ArraySubquery(_) + | Expr::List(_) + | Expr::ListSubquery(_) + | Expr::Map(_) + | Expr::MapSubquery(_) + | Expr::FieldAccess { .. } + | Expr::WildcardAccess(_) + | Expr::Collate { .. } => false, + // `Cast`: the type parser swallows a following `[…]` as an array suffix + // (`a::int4[1]` is `a` cast to `int4[]`, not a subscript of `a::int4`). + // `Subscript`: consecutive `[…]` flatten into one node (`a[1][2]` is a + // single subscript), so a nested subscript receiver must be parenthesized + // to stay nested. Everything else (operators, `IS`/`LIKE`/… constructs) + // binds looser than `[` and would re-associate, so parenthesize by default. + _ => true, }; if needs_parens { f.write_str("("); diff --git a/src/sql-parser/src/parser.rs b/src/sql-parser/src/parser.rs index 7d9d1008b314f..e15bc33ae1413 100644 --- a/src/sql-parser/src/parser.rs +++ b/src/sql-parser/src/parser.rs @@ -343,8 +343,12 @@ const SPECULATIVE_FAILURES_PER_TOKEN: usize = 100; /// Defines a number of precedence classes operators follow. Since this enum derives Ord, the /// precedence classes are ordered from weakest binding at the top to tightest binding at the /// bottom. +/// +/// The expression printer's `ast::defs::expr::prec` ranks are derived from this +/// ladder via `as u8`, so this enum is the single source of truth for output +/// parenthesization precedence too. Keep the ordering authoritative. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] -enum Precedence { +pub(crate) enum Precedence { Zero, Or, And, diff --git a/src/sql-parser/tests/sqlparser_common.rs b/src/sql-parser/tests/sqlparser_common.rs index 113c089d7d7c8..01a875fe5f915 100644 --- a/src/sql-parser/tests/sqlparser_common.rs +++ b/src/sql-parser/tests/sqlparser_common.rs @@ -740,3 +740,397 @@ fn test_set_operation_leading_show_display_roundtrip() { // parse_pretty_roundtrip `(SHOW … EXCEPT SELECT …)` finding. assert_display_roundtrips("(SHOW foo EXCEPT SELECT 1)"); } + +#[mz_ore::test] +fn cast_operand_reparenthesized_after_nested_stripped() { + use mz_sql_parser::ast::display::AstDisplay; + use mz_sql_parser::ast::visit_mut::{self, VisitMut}; + use mz_sql_parser::ast::{AstInfo, Expr}; + + // Strips `Expr::Nested`, mimicking an AST transform (or the fuzz oracle's + // `normalize`) that drops the parser's protective parentheses. + struct StripNested; + impl<'a, T: AstInfo> VisitMut<'a, T> for StripNested { + fn visit_expr_mut(&mut self, e: &'a mut Expr) { + visit_mut::visit_expr_mut(self, e); + if let Expr::Nested(inner) = e { + *e = (**inner).clone(); + } + } + } + + // `CAST(-0 AS int4)` parses to `Cast(Nested(- 0))`. The unary minus is not + // self-delimiting, so the printer must re-add parens once the `Nested` is + // gone. Otherwise it prints `- 0::int4`, which reparses as `- (0::int4)` + // (a structurally different, semantically different expression). + let mut ast = mz_sql_parser::parser::parse_statements("SELECT CAST(-0 AS int4) + a") + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut ast); + let displayed = ast.to_ast_string_simple(); + let mut reparsed = mz_sql_parser::parser::parse_statements(&displayed) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut reparsed); + assert_eq!( + ast, reparsed, + "Cast display did not round-trip after Nested was stripped; displayed = {displayed:?}" + ); +} + +#[mz_ore::test] +fn between_bound_reparenthesized_after_nested_stripped() { + use mz_sql_parser::ast::display::AstDisplay; + use mz_sql_parser::ast::visit_mut::{self, VisitMut}; + use mz_sql_parser::ast::{AstInfo, Expr}; + + struct StripNested; + impl<'a, T: AstInfo> VisitMut<'a, T> for StripNested { + fn visit_expr_mut(&mut self, e: &'a mut Expr) { + visit_mut::visit_expr_mut(self, e); + if let Expr::Nested(inner) = e { + *e = (**inner).clone(); + } + } + } + + // A `BETWEEN` bound parses with `parse_subexpr(Precedence::Like)`, so a bound + // whose left spine binds at or below `Like` is wrapped in `Expr::Nested` by + // the parser. Once the `Nested` is stripped, the printer must re-add the + // parens, or the bound's leading operator re-associates out of the `BETWEEN` + // (`... AND 0 = a` reparses as `(... BETWEEN ... AND 0) = a`, and + // `... 1 IS NULL AND ...` makes the parser expect `AND` but find `IS`). The + // right-closing forms (`IS NULL`, `= ANY (…)`, `IN (…)`) are the interesting + // cases: their looseness is on the *left* spine, so the decision must use the + // left edge, not the right edge (which closes at `ATOM`). + for sql in [ + "SELECT false BETWEEN x AND (0 = a)", + "SELECT y BETWEEN (1 IS NULL) AND z", + "SELECT y BETWEEN x AND (1 IS NULL)", + "SELECT y BETWEEN (a = ANY (ARRAY[b])) AND z", + "SELECT y BETWEEN (a IN (SELECT c FROM t)) AND z", + "SELECT y BETWEEN (a OR b) AND z", + // The left-spine operator is buried under a tighter top (`IN`, `Like`). + "SELECT y BETWEEN (a = ANY (ARRAY[b]) IN (SELECT c FROM t)) AND z", + ] { + let mut ast = mz_sql_parser::parser::parse_statements(sql) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut ast); + let displayed = ast.to_ast_string_simple(); + let mut reparsed = mz_sql_parser::parser::parse_statements(&displayed) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut reparsed); + assert_eq!( + ast, reparsed, + "BETWEEN bound display did not round-trip after Nested was stripped: {sql:?} -> {displayed:?}" + ); + } +} + +#[mz_ore::test] +fn binary_op_operand_reparenthesized_after_nested_stripped() { + use mz_sql_parser::ast::display::AstDisplay; + use mz_sql_parser::ast::visit_mut::{self, VisitMut}; + use mz_sql_parser::ast::{AstInfo, Expr}; + + struct StripNested; + impl<'a, T: AstInfo> VisitMut<'a, T> for StripNested { + fn visit_expr_mut(&mut self, e: &'a mut Expr) { + visit_mut::visit_expr_mut(self, e); + if let Expr::Nested(inner) = e { + *e = (**inner).clone(); + } + } + } + + // A binary operator must parenthesize an operand that re-associates on + // reparse once the parser's protective `Expr::Nested` is stripped. The left + // operand is decided by its *right* edge (the operator reaches into its right + // spine), the right operand by its *left* edge. An operand's top operator + // is not enough, because a left-/right-nested chain can bury a looser operator + // down the spine. `(a + b) * c` -> `Op(*, Op(+), c)` must still print + // `(a + b) * c`, not `a + b * c`. + for sql in [ + "SELECT (a + b) * c", + "SELECT (a OR b) AND c", + "SELECT (NOT a IS DISTINCT FROM b) + c", + "SELECT (a OR b) IN (SELECT c FROM bar)", + "SELECT x LIKE (b IN (SELECT c FROM bar))", + // Prefix `~` binds looser than `+`/`*`, so it needs parens as their operand. + "SELECT (~ a) + b", + "SELECT (~ a) * b", + // A prefix op is right-transparent: the quantified `= ANY` would bind into + // the `NOT` unless the whole left is parenthesized. + "SELECT (- NOT a IN (b)) = ANY (SELECT c FROM t)", + // The right operand's *top* operator (`IN`, binds tighter than `<>`) hides + // a looser `= ANY` (`Cmp`, equal to `<>`) on its left spine. Without + // parens the `<>` would re-associate into that `= ANY`'s left, so the + // right operand must be parenthesized by its left edge, not its top. + "SELECT a <> (b = ANY (ARRAY[c]) IN (SELECT d FROM t))", + // The quantified `` is an ordinary binary infix and can bind *tighter* + // than its left's right spine: `*` (`MultiplyDivide`) over a `+` + // (`PlusMinus`) left must keep the parens, or `(a + b) * ANY (...)` prints + // `a + b * ANY (...)` and reparses as `a + (b * ANY (...))`. Decided by the + // left's right edge vs ``'s own precedence, not a fixed `Like` cutoff. + "SELECT (a + b) * ANY (ARRAY[c])", + "SELECT (a + b) * ANY (SELECT c FROM t)", + "SELECT (a - b) / ALL (ARRAY[c])", + "SELECT (a - b) % ALL (SELECT c FROM t)", + // Prefix `~` binds at `Other`, looser than `*`, so it is right-transparent + // for a tighter quantified `*`: `~ a * ANY (...)` would bind the `* ANY` + // into the `~`'s operand without the parens. + "SELECT (~ a) * ANY (ARRAY[c])", + ] { + let mut ast = mz_sql_parser::parser::parse_statements(sql) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut ast); + let displayed = ast.to_ast_string_simple(); + let mut reparsed = mz_sql_parser::parser::parse_statements(&displayed) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut reparsed); + assert_eq!( + ast, reparsed, + "binary-op display did not round-trip after Nested was stripped: {sql:?} -> {displayed:?}" + ); + } +} + +#[mz_ore::test] +#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux` +fn quantified_left_minimal_parens_after_nested_stripped() { + use mz_sql_parser::ast::display::AstDisplay; + use mz_sql_parser::ast::visit_mut::{self, VisitMut}; + use mz_sql_parser::ast::{AstInfo, Expr}; + + struct StripNested; + impl<'a, T: AstInfo> VisitMut<'a, T> for StripNested { + fn visit_expr_mut(&mut self, e: &'a mut Expr) { + visit_mut::visit_expr_mut(self, e); + if let Expr::Nested(inner) = e { + *e = (**inner).clone(); + } + } + } + + // The quantified `` binds at its own precedence, so a left whose right + // edge is *equal to or tighter than* `` needs no parens. The old fixed + // `Like` threshold wrapped every `Cmp`/`Like`-edged left. Assert the minimal + // form now prints bare (and still round-trips). The `(left)` in each input is + // the parser's protective `Nested`, which we strip so `write_quantified_left` + // alone decides the parens. + for (sql, expected) in [ + // Equal precedence on the left's right edge: the left's own `=` (`Cmp`) + // closes before the quantified `=` (`Cmp`), left-associatively. + ( + "SELECT (a = b) = ANY (ARRAY[c])", + "SELECT a = b = ANY (ARRAY[c])", + ), + // Tighter left edge (`Like` > `Cmp`) under a looser quantified `=`. + ( + "SELECT (a LIKE b) = ANY (ARRAY[c])", + "SELECT a LIKE b = ANY (ARRAY[c])", + ), + // Much tighter arithmetic left under a looser quantified `=`. + ( + "SELECT (a + b) = ANY (ARRAY[c])", + "SELECT a + b = ANY (ARRAY[c])", + ), + ] { + let mut ast = mz_sql_parser::parser::parse_statements(sql) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut ast); + let displayed = ast.to_ast_string_simple(); + assert_eq!( + displayed, expected, + "quantified-left over-parenthesized after Nested was stripped: {sql:?}" + ); + // The bare form must still reparse to the same AST. + let mut reparsed = mz_sql_parser::parser::parse_statements(&displayed) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut reparsed); + assert_eq!( + ast, reparsed, + "minimal quantified-left did not round-trip: {sql:?}" + ); + } +} + +#[mz_ore::test] +fn is_distinct_from_rhs_reparenthesized_after_nested_stripped() { + use mz_sql_parser::ast::display::AstDisplay; + use mz_sql_parser::ast::visit_mut::{self, VisitMut}; + use mz_sql_parser::ast::{AstInfo, Expr}; + + struct StripNested; + impl<'a, T: AstInfo> VisitMut<'a, T> for StripNested { + fn visit_expr_mut(&mut self, e: &'a mut Expr) { + visit_mut::visit_expr_mut(self, e); + if let Expr::Nested(inner) = e { + *e = (**inner).clone(); + } + } + } + + // `IS DISTINCT FROM` parses its right-hand side at the `IS` precedence (see + // `Parser::parse_is`), so a RHS whose left spine binds at or below `IS` + // (`OR`/`AND`/`IS`) is wrapped in `Expr::Nested`. Once stripped, the printer + // must re-add the parens. Otherwise `a IS DISTINCT FROM b OR c` reparses as + // `(a IS DISTINCT FROM b) OR c`. This drift is invisible to a stable-string + // round trip (both ASTs print to the same string), so it is checked + // structurally here. + for sql in [ + "SELECT a IS DISTINCT FROM (b OR c)", + "SELECT a IS DISTINCT FROM (b AND c)", + "SELECT a IS DISTINCT FROM (b IS DISTINCT FROM c)", + "SELECT a IS DISTINCT FROM (b IS NULL)", + // A comparison RHS binds tighter than `IS`, so it stays bare. + "SELECT a IS DISTINCT FROM b = c", + ] { + let mut ast = mz_sql_parser::parser::parse_statements(sql) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut ast); + let displayed = ast.to_ast_string_simple(); + let mut reparsed = mz_sql_parser::parser::parse_statements(&displayed) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut reparsed); + assert_eq!( + ast, reparsed, + "IS DISTINCT FROM RHS display did not round-trip after Nested was stripped: {sql:?} -> {displayed:?}" + ); + } +} + +#[mz_ore::test] +fn like_pattern_reparenthesized_after_nested_stripped() { + use mz_sql_parser::ast::display::AstDisplay; + use mz_sql_parser::ast::visit_mut::{self, VisitMut}; + use mz_sql_parser::ast::{AstInfo, Expr}; + + struct StripNested; + impl<'a, T: AstInfo> VisitMut<'a, T> for StripNested { + fn visit_expr_mut(&mut self, e: &'a mut Expr) { + visit_mut::visit_expr_mut(self, e); + if let Expr::Nested(inner) = e { + *e = (**inner).clone(); + } + } + } + + // A `[I]LIKE` pattern parses at `Like` precedence. When an `ESCAPE` follows, + // the pattern is immediately left of the `ESCAPE` keyword, so a `[I]LIKE` + // exposed on the pattern's *right* spine steals the `ESCAPE` as its own: + // `x LIKE NOT (a LIKE b) ESCAPE c`, printed bare as + // `x LIKE NOT a LIKE b ESCAPE c`, binds the escape to the inner `a LIKE b`. + // The drift is structural and produces colliding stable strings, so it is + // checked structurally. + for sql in [ + "SELECT x LIKE (NOT (a LIKE b)) ESCAPE c", + "SELECT x LIKE (a LIKE b) ESCAPE c", + "SELECT x ILIKE (a ILIKE b) ESCAPE c", + // Left-spine cases (covered with or without escape). + "SELECT x LIKE (a IN (SELECT b FROM t))", + // No escape: a right-spine LIKE is harmless, so this stays bare. + "SELECT x LIKE NOT (a LIKE b)", + ] { + let mut ast = mz_sql_parser::parser::parse_statements(sql) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut ast); + let displayed = ast.to_ast_string_simple(); + let mut reparsed = mz_sql_parser::parser::parse_statements(&displayed) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut reparsed); + assert_eq!( + ast, reparsed, + "LIKE pattern display did not round-trip after Nested was stripped: {sql:?} -> {displayed:?}" + ); + } +} + +#[mz_ore::test] +fn postfix_access_receiver_reparenthesized_after_nested_stripped() { + use mz_sql_parser::ast::display::AstDisplay; + use mz_sql_parser::ast::visit_mut::{self, VisitMut}; + use mz_sql_parser::ast::{AstInfo, Expr}; + + struct StripNested; + impl<'a, T: AstInfo> VisitMut<'a, T> for StripNested { + fn visit_expr_mut(&mut self, e: &'a mut Expr) { + visit_mut::visit_expr_mut(self, e); + if let Expr::Nested(inner) = e { + *e = (**inner).clone(); + } + } + } + + // The receivers of the postfix `.field`/`.*` and `[…]` operators must stay + // self-delimiting once `Expr::Nested` is stripped, or the trailing token + // re-binds. A bare-name `FieldAccess`/`WildcardAccess` receiver prints as a + // dotted name and collides with a qualified identifier (`(a).b` -> `a.b` is + // `Identifier([a, b])`). A `Cast` subscript receiver lets the type parser eat + // the `[…]` as an array suffix (`(a::int4)[1]` -> `a::int4[1]` is `a::int4[]`). + // A nested `Subscript` receiver flattens (`(a[1])[2]` -> `a[1][2]` is one + // two-position subscript). + for sql in [ + // dot receiver + "SELECT (a).b", + "SELECT (a).*", + "SELECT ((a).b).c", + "SELECT ((a).*).*", + "SELECT (a).b[1]", + // subscript receiver + "SELECT (a::int4)[1]", + "SELECT (CAST(a AS int4))[1]", + "SELECT (a[1])[2]", + "SELECT (a + b)[1]", + // quantified-subquery dot receiver: the trailing `(query)` is a sub-part, + // so `.x`/`.*` would bind to it rather than the whole quantified expr. + "SELECT (a = ANY (SELECT b FROM t)).c", + "SELECT (a = ANY (SELECT b FROM t)).*", + "SELECT (a = ALL (SELECT b FROM t)).c", + "SELECT x BETWEEN (0 = ANY (SELECT b FROM t)).c AND y", + // these are parser-shaped and must remain stable (no spurious parens) + "SELECT (x).a.b", + "SELECT a.b[1]", + "SELECT a[1][2]", + "SELECT f(x)[1]", + "SELECT (a + b).c", + ] { + let mut ast = mz_sql_parser::parser::parse_statements(sql) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut ast); + let displayed = ast.to_ast_string_simple(); + let mut reparsed = mz_sql_parser::parser::parse_statements(&displayed) + .unwrap() + .remove(0) + .ast; + StripNested.visit_statement_mut(&mut reparsed); + assert_eq!( + ast, reparsed, + "postfix-access receiver display did not round-trip after Nested was stripped: {sql:?} -> {displayed:?}" + ); + } +}