Skip to content

Commit 083c4fe

Browse files
committed
Address reviewer comments
1 parent ab477e9 commit 083c4fe

3 files changed

Lines changed: 171 additions & 75 deletions

File tree

src/sql-parser/src/ast/defs/expr.rs

Lines changed: 81 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ impl<T: AstInfo> AstDisplay for Expr<T> {
222222
f.write_str("NOT ");
223223
// `NOT` binds tighter than `AND`/`OR`, so an operand exposing a
224224
// looser operator on its left spine (`NOT (a OR b)`) must keep its
225-
// parens once `Nested` is stripped use `left_edge`, since the
225+
// parens once `Nested` is stripped. We use `left_edge`, since the
226226
// `NOT` sits to the operand's left.
227227
write_binary_operand(f, expr, left_edge(expr) < prec::NOT);
228228
}
@@ -305,7 +305,7 @@ impl<T: AstInfo> AstDisplay for Expr<T> {
305305
// The pattern and escape parse at `Like` precedence and sit to the
306306
// right of the keyword, so an operand exposing a precedence at or
307307
// below `Like` on its left spine (e.g. an `IN`/`LIKE`/`BETWEEN` at
308-
// equal precedence) re-associates unless parenthesized
308+
// equal precedence) re-associates unless parenthesized.
309309
// `a LIKE b IN (q)` parses as `(a LIKE b) IN (q)`. When an `ESCAPE`
310310
// follows, the pattern is *also* immediately left of `ESCAPE`: a
311311
// `[I]LIKE` exposed on the pattern's right spine would steal the
@@ -326,7 +326,7 @@ impl<T: AstInfo> AstDisplay for Expr<T> {
326326
high,
327327
} => {
328328
// The subject is the LHS of the `BETWEEN` infix (parsed at
329-
// `Like`); a spine exposed at or below `Like` on its right would
329+
// `Like`). A spine exposed at or below `Like` on its right would
330330
// pull `BETWEEN` inside it (`a OR b BETWEEN …` is `a OR (b BETWEEN
331331
// …)`), so parenthesize via `right_edge`. Parser ASTs wrap such a
332332
// subject in `Nested` (`ATOM`), so they're unaffected.
@@ -341,18 +341,12 @@ impl<T: AstInfo> AstDisplay for Expr<T> {
341341
}
342342
Expr::Op { op, expr1, expr2 } => {
343343
if let Some(expr2) = expr2 {
344-
// Binary operators are left-associative, so an operand that
345-
// re-associates on reparse must be parenthesized. The left
346-
// operand needs parens when the operator (printed to its
347-
// right) reaches into its right spine at a strictly looser
348-
// precedence (`right_edge`); the right operand needs parens
349-
// when the operator (printed to its left) reaches into its
350-
// left spine at an equal-or-looser precedence (`left_edge`,
351-
// `<=` because equal precedence re-associates left, so
352-
// `a - (b - c)` must keep its parens). The parser wraps such
353-
// operands in `Expr::Nested` (which ranks `ATOM` on both
354-
// edges, so it never re-parenthesizes), leaving parser-produced
355-
// ASTs unchanged; this re-adds parens for ASTs that lost them.
344+
// Binary operators are left-associative: parenthesize an
345+
// operand that would re-associate once `Nested` is stripped —
346+
// the left by its `right_edge` (strictly looser than `op`), the
347+
// right by its `left_edge` (equal-or-looser, as equal
348+
// re-associates left). See those helpers for the spine
349+
// reasoning.
356350
let p = binary_op_precedence(op);
357351
write_binary_operand(f, expr1, right_edge(expr1) < p);
358352
f.write_str(" ");
@@ -378,12 +372,12 @@ impl<T: AstInfo> AstDisplay for Expr<T> {
378372
}
379373
Expr::Cast { expr, data_type } => {
380374
// `::` binds very tightly, so a non-self-delimiting operand must
381-
// be parenthesized or the cast re-associates into its spine
375+
// be parenthesized or the cast re-associates into its spine.
382376
// `CAST(-0 AS int4)` (i.e. `Cast(- 0)`) would otherwise print as
383377
// `- 0::int4` and reparse as `- (0::int4)`. The parser wraps such
384378
// operands in `Expr::Nested`, but `normalize` strips those, so the
385-
// printer must re-add them (mirrors the `Collate` arm; `Nested` is
386-
// itself self-delimiting, so parser-produced ASTs don't double up).
379+
// printer must re-add them (mirroring the `Collate` arm). `Nested`
380+
// is itself self-delimiting, so parser-produced ASTs don't double up.
387381
if prints_self_delimiting(expr) {
388382
f.write_node(&expr);
389383
} else {
@@ -472,31 +466,31 @@ impl<T: AstInfo> AstDisplay for Expr<T> {
472466
f.write_str(")");
473467
}
474468
Expr::AnySubquery { left, op, right } => {
475-
write_quantified_left(f, left);
469+
write_quantified_left(f, left, op);
476470
f.write_str(" ");
477471
f.write_str(op);
478472
f.write_str(" ANY (");
479473
f.write_node(&right);
480474
f.write_str(")");
481475
}
482476
Expr::AnyExpr { left, op, right } => {
483-
write_quantified_left(f, left);
477+
write_quantified_left(f, left, op);
484478
f.write_str(" ");
485479
f.write_str(op);
486480
f.write_str(" ANY (");
487481
f.write_node(&right);
488482
f.write_str(")");
489483
}
490484
Expr::AllSubquery { left, op, right } => {
491-
write_quantified_left(f, left);
485+
write_quantified_left(f, left, op);
492486
f.write_str(" ");
493487
f.write_str(op);
494488
f.write_str(" ALL (");
495489
f.write_node(&right);
496490
f.write_str(")");
497491
}
498492
Expr::AllExpr { left, op, right } => {
499-
write_quantified_left(f, left);
493+
write_quantified_left(f, left, op);
500494
f.write_str(" ");
501495
f.write_str(op);
502496
f.write_str(" ALL (");
@@ -562,13 +556,13 @@ impl_display_t!(Expr);
562556
/// number `1.` and leaves `x` as an alias, and `'a'::T.x` consumes `T.x` as a
563557
/// qualified type name. The whitelist below covers receivers that print as
564558
/// self-terminating syntax (parenthesized exprs, function calls, bracketed
565-
/// collections, etc.); anything else gets explicit parens.
559+
/// collections, etc.). Anything else gets explicit parens.
566560
///
567561
/// A bare `Identifier`/`QualifiedWildcard` receiver is *not* safe: `a` then
568562
/// `.b`/`.*` prints as `a.b`/`a.*`, which reparses as the qualified identifier
569563
/// `Identifier([a, b])` / `QualifiedWildcard([a])` rather than a field/wildcard
570564
/// access. The parser only ever builds those accesses over a parenthesized
571-
/// receiver (`(a).b`), so it wraps the name in `Expr::Nested`; a bare name here
565+
/// receiver (`(a).b`), so it wraps the name in `Expr::Nested`. A bare name here
572566
/// is a `Nested`-stripped AST and must be re-parenthesized. A `FieldAccess` /
573567
/// `WildcardAccess` receiver *is* safe, because its own printing already
574568
/// parenthesizes a bare-name base (`(a).b.c`), so the chain stays self-delimiting.
@@ -617,15 +611,27 @@ fn write_dot_receiver<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, expr:
617611
}
618612
}
619613

620-
/// Write `left` as the LHS of `<left> <op> ANY/ALL (...)`. The parser parses the
621-
/// left at `Cmp`, and the `<op>` printed to its right is a binary infix that will
622-
/// reach into any spine exposed at or below `Like` and reparse it as part of the
623-
/// quantified expression's left rather than as a wrapper around it. Use
624-
/// [`right_edge`] so a low-precedence spine hidden under right-transparent
625-
/// prefixes (`- NOT a IN (b) = ANY (...)`, which exposes the `NOT`'s `IN`) is
626-
/// caught; a normal tight infix `Op` prints bare.
627-
fn write_quantified_left<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, expr: &Expr<T>) {
628-
let needs_parens = right_edge(expr) <= prec::LIKE;
614+
/// Write `left` as the LHS of `<left> <op> ANY/ALL (...)`. The printed `<op>` is an
615+
/// ordinary binary infix. It can be any operator the parser accepts here, from
616+
/// `=`/`<` (`Cmp`) all the way down to `*`/`/`/`%` (`MultiplyDivide`), and on
617+
/// reparse it binds into any operator exposed on `left`'s right spine that is
618+
/// *strictly looser* than `<op>` itself, stealing that suffix into the quantified
619+
/// expression's left rather than wrapping the whole `left`. So parenthesize
620+
/// exactly when `left`'s [`right_edge`] binds looser than `<op>`'s own precedence
621+
/// ([`binary_op_precedence`]), mirroring the binary-`Op` arm. Using the operator's
622+
/// real precedence (not a fixed `Like` threshold) both parenthesizes a
623+
/// tighter-binding `<op>` over a looser left and leaves an equal-or-tighter left
624+
/// bare (`a = b = ANY (…)`, `a LIKE b = ANY (…)`), which the old fixed threshold
625+
/// over-parenthesized. The tighter-binding case, `(a + b) * ANY (…)`, would
626+
/// otherwise print `a + b * ANY (…)` and reparse as the different
627+
/// `a + (b * ANY (…))`. [`right_edge`] also sees a looser spine hidden under
628+
/// right-transparent prefixes, e.g. the `NOT`'s `IN` in `- NOT a IN (b) = ANY (…)`.
629+
fn write_quantified_left<W: fmt::Write, T: AstInfo>(
630+
f: &mut AstFormatter<W>,
631+
expr: &Expr<T>,
632+
op: &Op,
633+
) {
634+
let needs_parens = right_edge(expr) < binary_op_precedence(op);
629635
if needs_parens {
630636
f.write_str("(");
631637
f.write_node(expr);
@@ -638,13 +644,13 @@ fn write_quantified_left<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, exp
638644
/// Write `bound` as a `BETWEEN … AND …` bound. The parser parses both bounds with
639645
/// `parse_subexpr(Precedence::Like)` (see `Parser::parse_between`), starting fresh
640646
/// with nothing to the bound's left, so it walks the bound's *left spine* and
641-
/// stops at the first operator binding at or below `Like` leaving that operator
647+
/// stops at the first operator binding at or below `Like`, leaving that operator
642648
/// outside the bound (`x BETWEEN 1 IS NULL AND y` parses `1` as the bound, then
643649
/// expects `AND` but finds `IS`). A bound is therefore safe bare only when its
644-
/// left edge binds strictly above `Like`; use [`left_edge`] (not [`right_edge`],
650+
/// left edge binds strictly above `Like`. Use [`left_edge`] (not [`right_edge`],
645651
/// which closes at `ATOM` for the right-closing `IS NULL`/`= ANY (…)`/`IN (…)`
646652
/// forms whose looseness is on the left). The parser wraps these bounds in
647-
/// `Expr::Nested` (which is `ATOM`, so it prints bare); this re-adds the parens
653+
/// `Expr::Nested` (which is `ATOM`, so it prints bare). This re-adds the parens
648654
/// for ASTs where that wrapper is absent.
649655
fn write_between_bound<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, bound: &Expr<T>) {
650656
let needs_parens = left_edge(bound) <= prec::LIKE;
@@ -657,32 +663,41 @@ fn write_between_bound<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, bound
657663
}
658664
}
659665

660-
/// Output-precedence ranks, ordered like `Parser::get_next_precedence` /
661-
/// `Parser::Precedence` (higher binds tighter). They classify the *top* operator
662-
/// an expr prints with, so the binary-operator printer can parenthesize an
663-
/// operand that would otherwise re-associate on reparse. Keep in sync with the
664-
/// parser. `ATOM` covers the self-delimiting primaries — they never need parens.
666+
/// Output-precedence ranks, derived directly from the parser's [`Precedence`]
667+
/// ladder (higher binds tighter) so it stays the single source of truth:
668+
/// reordering or inserting a parser level reranks these automatically, and only
669+
/// the variant each rank maps to is maintained by hand. They classify the *top*
670+
/// operator an expr prints with, so the binary-operator printer can parenthesize
671+
/// an operand that would otherwise re-associate on reparse. `ATOM`, the one rank
672+
/// with no parser counterpart, is layered one above the tightest parser level to
673+
/// mark the self-delimiting primaries; they never need parens.
674+
///
675+
/// [`Precedence`]: crate::parser::Precedence
676+
// `Precedence` is a fieldless enum with a handful of variants, so reading each
677+
// discriminant with `as u8` is exact and lossless.
678+
#[allow(clippy::as_conversions)]
665679
mod prec {
666-
pub const OR: u8 = 1;
667-
pub const AND: u8 = 2;
668-
pub const NOT: u8 = 3;
669-
pub const IS: u8 = 4;
670-
pub const CMP: u8 = 5;
671-
pub const LIKE: u8 = 6;
672-
pub const OTHER: u8 = 7;
673-
pub const PLUS_MINUS: u8 = 8;
674-
pub const MULTIPLY_DIVIDE: u8 = 9;
675-
// The `COLLATE` and postfix (`::`/`[…]`) parser levels (10 and 12) live
676-
// between `MULTIPLY_DIVIDE` and `ATOM`, but neither edge function ever
677-
// *returns* them: those forms are self-delimiting (their own operand is
678-
// parenthesized when it isn't), so both their edges rank `ATOM`. The holes
679-
// are kept so `PREFIX` keeps the parser's relative position.
680+
use crate::parser::Precedence;
681+
682+
pub const OR: u8 = Precedence::Or as u8;
683+
pub const AND: u8 = Precedence::And as u8;
684+
pub const NOT: u8 = Precedence::PrefixNot as u8;
685+
pub const IS: u8 = Precedence::Is as u8;
686+
pub const CMP: u8 = Precedence::Cmp as u8;
687+
pub const LIKE: u8 = Precedence::Like as u8;
688+
pub const OTHER: u8 = Precedence::Other as u8;
689+
pub const PLUS_MINUS: u8 = Precedence::PlusMinus as u8;
690+
pub const MULTIPLY_DIVIDE: u8 = Precedence::MultiplyDivide as u8;
691+
// The `COLLATE` and postfix (`::`/`[…]`) parser levels live between
692+
// `MULTIPLY_DIVIDE` and `ATOM`, but neither edge function ever *returns* them:
693+
// those forms are self-delimiting (their own operand is parenthesized when it
694+
// isn't), so both their edges rank `ATOM`. Kept for parity with the ladder.
680695
#[allow(dead_code)]
681-
pub const COLLATE: u8 = 10;
682-
pub const PREFIX: u8 = 11;
696+
pub const COLLATE: u8 = Precedence::PostfixCollateAt as u8;
697+
pub const PREFIX: u8 = Precedence::PrefixPlusMinus as u8;
683698
#[allow(dead_code)]
684-
pub const POSTFIX: u8 = 12;
685-
pub const ATOM: u8 = 13;
699+
pub const POSTFIX: u8 = Precedence::PostfixSubscriptCast as u8;
700+
pub const ATOM: u8 = Precedence::PostfixSubscriptCast as u8 + 1;
686701
}
687702

688703
/// The precedence of a binary operator, mirroring `Parser::get_next_precedence`.
@@ -702,7 +717,7 @@ fn binary_op_precedence(op: &Op) -> u8 {
702717
/// The precedence at which a prefix operator (`Op` with no second operand)
703718
/// parses its operand, mirroring `Parser::parse_prefix`: `-`/`+` at
704719
/// `PrefixPlusMinus`, but `~` (and namespaced prefixes) at `Other`, so `~ a + b`
705-
/// parses as `~ (a + b)` `~` binds looser than `+`/`-`/`*`.
720+
/// parses as `~ (a + b)`. `~` binds looser than `+`/`-`/`*`.
706721
fn unary_prec(op: &Op) -> u8 {
707722
if op.namespace.is_none() && (op.op == "-" || op.op == "+") {
708723
prec::PREFIX
@@ -711,7 +726,7 @@ fn unary_prec(op: &Op) -> u8 {
711726
}
712727
}
713728

714-
/// The loosest precedence exposed on `expr`'s *right spine* the precedence at
729+
/// The loosest precedence exposed on `expr`'s *right spine*, the precedence at
715730
/// which an operator printed immediately to its right would bind *into* it
716731
/// rather than wrap it. For a left operand / subject of a construct that prints
717732
/// to its right, this is what decides parenthesization (its mirror, [`left_edge`],
@@ -736,12 +751,12 @@ fn right_edge<T: AstInfo>(expr: &Expr<T>) -> u8 {
736751
expr2: None,
737752
} => unary_prec(op).min(right_edge(expr1)),
738753
Expr::Not { expr } => prec::NOT.min(right_edge(expr)),
739-
// `IS DISTINCT FROM x` exposes `x`; `IS NULL`/`TRUE`/… close.
754+
// `IS DISTINCT FROM x` exposes `x`, while `IS NULL`/`TRUE`/… close.
740755
Expr::IsExpr {
741756
construct: IsExprConstruct::DistinctFrom(x),
742757
..
743758
} => prec::IS.min(right_edge(x)),
744-
// `… BETWEEN low AND high` exposes `high`; `… [I]LIKE pat [ESCAPE esc]`
759+
// `… BETWEEN low AND high` exposes `high`. `… [I]LIKE pat [ESCAPE esc]`
745760
// exposes the rightmost of `esc`/`pat`.
746761
Expr::Between { high, .. } => prec::LIKE.min(right_edge(high)),
747762
Expr::Like {
@@ -756,7 +771,7 @@ fn right_edge<T: AstInfo>(expr: &Expr<T>) -> u8 {
756771
}
757772
}
758773

759-
/// The loosest precedence exposed on `expr`'s *left spine* the mirror of
774+
/// The loosest precedence exposed on `expr`'s *left spine*, the mirror of
760775
/// [`right_edge`]. For a *right* operand (an operator on its left), this is what
761776
/// decides parenthesization: a left-associative operator printed to its left
762777
/// reaches into the left spine and re-associates if that spine exposes a
@@ -788,10 +803,10 @@ fn left_edge<T: AstInfo>(expr: &Expr<T>) -> u8 {
788803
| Expr::Between { expr, .. }
789804
| Expr::InList { expr, .. }
790805
| Expr::InSubquery { expr, .. } => prec::LIKE.min(left_edge(expr)),
791-
// Everything else leads with its own token on the left a prefix
806+
// Everything else leads with its own token on the left: a prefix
792807
// operator (`-`/`+`/`~`/`NOT`), a keyword, `(…)`, `ARRAY[…]`, a literal,
793808
// or a `COLLATE`/`::`/`[…]` whose own operand the printer parenthesizes
794-
// when it isn't self-delimiting — so nothing to the left binds into it.
809+
// when it isn't self-delimiting. Nothing to the left binds into it.
795810
_ => prec::ATOM,
796811
}
797812
}

src/sql-parser/src/parser.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,12 @@ const SPECULATIVE_FAILURES_PER_TOKEN: usize = 100;
343343
/// Defines a number of precedence classes operators follow. Since this enum derives Ord, the
344344
/// precedence classes are ordered from weakest binding at the top to tightest binding at the
345345
/// bottom.
346+
///
347+
/// The expression printer's `ast::defs::expr::prec` ranks are derived from this
348+
/// ladder via `as u8`, so this enum is the single source of truth for output
349+
/// parenthesization precedence too — keep the ordering authoritative.
346350
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
347-
enum Precedence {
351+
pub(crate) enum Precedence {
348352
Zero,
349353
Or,
350354
And,

0 commit comments

Comments
 (0)