Skip to content

Commit 96fda3f

Browse files
def-claude
andcommitted
sql: parenthesize postfix-access receivers (. and []) by self-delimitation
The receiver helpers for the postfix `.field`/`.*` and `[…]` operators whitelisted variants that are only ever produced by the parser wrapped in `Expr::Nested`, so a `Nested`-stripped AST (an implicit rewrite, a fuzzed round trip) printed them bare and the trailing token re-bound: * `write_dot_receiver` treated a bare `Identifier`/`QualifiedWildcard` receiver as safe, but `a` followed by `.b`/`.*` prints `a.b`/`a.*`, which reparses as the qualified identifier `Identifier([a, b])` / `QualifiedWildcard([a])` instead of a field/wildcard access. The parser only builds those accesses over a parenthesized receiver, so it always wraps the name in `Nested`. Drop both from the safe set. `FieldAccess` / `WildcardAccess` receivers stay safe: their own printing parenthesizes a bare-name base (`(a).b.c`), so the chain remains self-delimiting and parser-shaped `(x).a.b` does not gain spurious parens. * `write_subscript_receiver` only parenthesized keyword identifiers. A `Cast` receiver is unsafe because the type parser eats the following `[…]` as an array suffix (`(a::int4)[1]` -> `a::int4[1]` is `a` cast to `int4[]`), and a nested `Subscript` receiver is unsafe because consecutive `[…]` flatten into one node (`(a[1])[2]` -> `a[1][2]` is a single two-position subscript). Replace the keyword check with an explicit safe whitelist that parenthesizes `Cast`, nested `Subscript`, and the operator/comparison constructs by default. Both are churn-free: the parser never emits these receivers bare (it eats the `[…]` into the cast type, flattens subscripts, and wraps dotted names in `Nested`), so the parser and pretty-printer datadriven suites are unchanged. Found by extending the round-trip grammar fuzzer to cover subscripts, field access, `COLLATE`, `AT TIME ZONE`, and the `position`/`extract`/`substring` special forms. Tests: adds `postfix_access_receiver_reparenthesized_after_nested_stripped` to `sqlparser_common.rs`. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent e4e1fbf commit 96fda3f

2 files changed

Lines changed: 109 additions & 10 deletions

File tree

src/sql-parser/src/ast/defs/expr.rs

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -561,14 +561,21 @@ impl_display_t!(Expr);
561561
/// the lexer and parser greedily extend adjacent tokens: `1.x` tokenizes the
562562
/// number `1.` and leaves `x` as an alias, and `'a'::T.x` consumes `T.x` as a
563563
/// qualified type name. The whitelist below covers receivers that print as
564-
/// self-terminating syntax (identifiers, parenthesized exprs, function calls,
565-
/// bracketed collections, etc.); anything else gets explicit parens.
564+
/// self-terminating syntax (parenthesized exprs, function calls, bracketed
565+
/// collections, etc.); anything else gets explicit parens.
566+
///
567+
/// A bare `Identifier`/`QualifiedWildcard` receiver is *not* safe: `a` then
568+
/// `.b`/`.*` prints as `a.b`/`a.*`, which reparses as the qualified identifier
569+
/// `Identifier([a, b])` / `QualifiedWildcard([a])` rather than a field/wildcard
570+
/// access. The parser only ever builds those accesses over a parenthesized
571+
/// receiver (`(a).b`), so it wraps the name in `Expr::Nested`; a bare name here
572+
/// is a `Nested`-stripped AST and must be re-parenthesized. A `FieldAccess` /
573+
/// `WildcardAccess` receiver *is* safe, because its own printing already
574+
/// parenthesizes a bare-name base (`(a).b.c`), so the chain stays self-delimiting.
566575
fn write_dot_receiver<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, expr: &Expr<T>) {
567576
let safe = matches!(
568577
expr,
569-
Expr::Identifier(_)
570-
| Expr::QualifiedWildcard(_)
571-
| Expr::FieldAccess { .. }
578+
Expr::FieldAccess { .. }
572579
| Expr::WildcardAccess(_)
573580
| Expr::Parameter(_)
574581
| Expr::Nested(_)
@@ -879,14 +886,45 @@ fn prefix_operand_needs_parens<T: AstInfo>(operand: &Expr<T>) -> bool {
879886
/// regular subscript. Parenthesize identifiers whose last component is a
880887
/// context-sensitive keyword so the round trip stays an identifier subscript.
881888
fn write_subscript_receiver<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, expr: &Expr<T>) {
882-
let needs_parens = if let Expr::Identifier(idents) = expr {
883-
idents
889+
let needs_parens = match expr {
890+
// A bare keyword identifier (`map`, `list`, …) dispatches to the
891+
// map/list-literal grammar before `[`, so it needs parens even though
892+
// identifiers are otherwise safe receivers.
893+
Expr::Identifier(idents) => idents
884894
.last()
885895
.and_then(|id| id.as_keyword())
886896
.map(|kw| kw.is_context_sensitive_keyword())
887-
.unwrap_or(false)
888-
} else {
889-
false
897+
.unwrap_or(false),
898+
// Self-delimiting primaries, the bracketed collections, and the postfix
899+
// forms that end in an identifier or `)` are safe: a following `[…]`
900+
// attaches to the whole receiver as a fresh subscript.
901+
Expr::QualifiedWildcard(_)
902+
| Expr::Parameter(_)
903+
| Expr::Value(_)
904+
| Expr::Function(_)
905+
| Expr::HomogenizingFunction { .. }
906+
| Expr::NullIf { .. }
907+
| Expr::Nested(_)
908+
| Expr::Subquery(_)
909+
| Expr::Exists(_)
910+
| Expr::Case { .. }
911+
| Expr::Row { .. }
912+
| Expr::Array(_)
913+
| Expr::ArraySubquery(_)
914+
| Expr::List(_)
915+
| Expr::ListSubquery(_)
916+
| Expr::Map(_)
917+
| Expr::MapSubquery(_)
918+
| Expr::FieldAccess { .. }
919+
| Expr::WildcardAccess(_)
920+
| Expr::Collate { .. } => false,
921+
// `Cast`: the type parser swallows a following `[…]` as an array suffix
922+
// (`a::int4[1]` is `a` cast to `int4[]`, not a subscript of `a::int4`).
923+
// `Subscript`: consecutive `[…]` flatten into one node (`a[1][2]` is a
924+
// single subscript), so a nested subscript receiver must be parenthesized
925+
// to stay nested. Everything else (operators, `IS`/`LIKE`/… constructs)
926+
// binds looser than `[` and would re-associate, so parenthesize by default.
927+
_ => true,
890928
};
891929
if needs_parens {
892930
f.write_str("(");

src/sql-parser/tests/sqlparser_common.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,3 +990,64 @@ fn like_pattern_reparenthesized_after_nested_stripped() {
990990
);
991991
}
992992
}
993+
994+
#[mz_ore::test]
995+
fn postfix_access_receiver_reparenthesized_after_nested_stripped() {
996+
use mz_sql_parser::ast::display::AstDisplay;
997+
use mz_sql_parser::ast::visit_mut::{self, VisitMut};
998+
use mz_sql_parser::ast::{AstInfo, Expr};
999+
1000+
struct StripNested;
1001+
impl<'a, T: AstInfo> VisitMut<'a, T> for StripNested {
1002+
fn visit_expr_mut(&mut self, e: &'a mut Expr<T>) {
1003+
visit_mut::visit_expr_mut(self, e);
1004+
if let Expr::Nested(inner) = e {
1005+
*e = (**inner).clone();
1006+
}
1007+
}
1008+
}
1009+
1010+
// The receivers of the postfix `.field`/`.*` and `[…]` operators must stay
1011+
// self-delimiting once `Expr::Nested` is stripped, or the trailing token
1012+
// re-binds. A bare-name `FieldAccess`/`WildcardAccess` receiver prints as a
1013+
// dotted name and collides with a qualified identifier (`(a).b` -> `a.b` is
1014+
// `Identifier([a, b])`); a `Cast` subscript receiver lets the type parser eat
1015+
// the `[…]` as an array suffix (`(a::int4)[1]` -> `a::int4[1]` is `a::int4[]`);
1016+
// and a nested `Subscript` receiver flattens (`(a[1])[2]` -> `a[1][2]` is one
1017+
// two-position subscript).
1018+
for sql in [
1019+
// dot receiver
1020+
"SELECT (a).b",
1021+
"SELECT (a).*",
1022+
"SELECT ((a).b).c",
1023+
"SELECT ((a).*).*",
1024+
"SELECT (a).b[1]",
1025+
// subscript receiver
1026+
"SELECT (a::int4)[1]",
1027+
"SELECT (CAST(a AS int4))[1]",
1028+
"SELECT (a[1])[2]",
1029+
"SELECT (a + b)[1]",
1030+
// these are parser-shaped and must remain stable (no spurious parens)
1031+
"SELECT (x).a.b",
1032+
"SELECT a.b[1]",
1033+
"SELECT a[1][2]",
1034+
"SELECT f(x)[1]",
1035+
"SELECT (a + b).c",
1036+
] {
1037+
let mut ast = mz_sql_parser::parser::parse_statements(sql)
1038+
.unwrap()
1039+
.remove(0)
1040+
.ast;
1041+
StripNested.visit_statement_mut(&mut ast);
1042+
let displayed = ast.to_ast_string_simple();
1043+
let mut reparsed = mz_sql_parser::parser::parse_statements(&displayed)
1044+
.unwrap()
1045+
.remove(0)
1046+
.ast;
1047+
StripNested.visit_statement_mut(&mut reparsed);
1048+
assert_eq!(
1049+
ast, reparsed,
1050+
"postfix-access receiver display did not round-trip after Nested was stripped: {sql:?} -> {displayed:?}"
1051+
);
1052+
}
1053+
}

0 commit comments

Comments
 (0)