Skip to content

Commit 88dfec7

Browse files
Samyak2ayman-sigma
authored andcommitted
GenericDialect: support colon operator for JsonAccess (apache#2124)
1 parent e39c55d commit 88dfec7

File tree

6 files changed

+173
-2
lines changed

6 files changed

+173
-2
lines changed

src/dialect/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any {
759759
Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => {
760760
Ok(p!(DoubleColon))
761761
}
762+
Token::Colon => match parser.peek_nth_token(1).token {
763+
// When colon is followed by a string or a number, it's usually in MAP syntax.
764+
Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()),
765+
// In other cases, it's used in semi-structured data traversal like in variant or JSON
766+
// string columns. See `JsonAccess`.
767+
_ => Ok(p!(Colon)),
768+
},
762769
Token::Arrow
763770
| Token::LongArrow
764771
| Token::HashArrow
@@ -812,6 +819,7 @@ pub trait Dialect: Debug + Any {
812819
Precedence::Ampersand => 23,
813820
Precedence::Caret => 22,
814821
Precedence::Pipe => 21,
822+
Precedence::Colon => 21,
815823
Precedence::Between => 20,
816824
Precedence::Eq => 20,
817825
Precedence::Like => 19,
@@ -1274,6 +1282,8 @@ pub enum Precedence {
12741282
Caret,
12751283
/// Bitwise `OR` / pipe operator (`|`).
12761284
Pipe,
1285+
/// `:` operator for json/variant access.
1286+
Colon,
12771287
/// `BETWEEN` operator.
12781288
Between,
12791289
/// Equality operator (`=`).

src/dialect/mssql.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,15 @@ impl Dialect for MsSqlDialect {
160160
None
161161
}
162162
}
163+
164+
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
165+
let token = parser.peek_token();
166+
match token.token {
167+
// lowest prec to prevent it from turning into a binary op
168+
Token::Colon => Some(Ok(self.prec_unknown())),
169+
_ => None,
170+
}
171+
}
163172
}
164173

165174
impl MsSqlDialect {

src/dialect/postgresql.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect {
136136
| Token::ShiftRight
137137
| Token::ShiftLeft
138138
| Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
139+
// lowest prec to prevent it from turning into a binary op
140+
Token::Colon => Some(Ok(self.prec_unknown())),
139141
_ => None,
140142
}
141143
}
@@ -159,6 +161,7 @@ impl Dialect for PostgreSqlDialect {
159161
Precedence::Ampersand => PG_OTHER_PREC,
160162
Precedence::Caret => CARET_PREC,
161163
Precedence::Pipe => PG_OTHER_PREC,
164+
Precedence::Colon => PG_OTHER_PREC,
162165
Precedence::Between => BETWEEN_LIKE_PREC,
163166
Precedence::Eq => EQ_PREC,
164167
Precedence::Like => BETWEEN_LIKE_PREC,

src/parser/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3955,7 +3955,8 @@ impl<'a> Parser<'a> {
39553955
let lower_bound = if self.consume_token(&Token::Colon) {
39563956
None
39573957
} else {
3958-
Some(self.parse_expr()?)
3958+
// parse expr until we hit a colon (or any token with lower precedence)
3959+
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
39593960
};
39603961

39613962
// check for end
@@ -3983,7 +3984,8 @@ impl<'a> Parser<'a> {
39833984
stride: None,
39843985
});
39853986
} else {
3986-
Some(self.parse_expr()?)
3987+
// parse expr until we hit a colon (or any token with lower precedence)
3988+
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
39873989
};
39883990

39893991
// check for end

tests/sqlparser_common.rs

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18093,3 +18093,148 @@ fn test_binary_kw_as_cast() {
1809318093
all_dialects_where(|d| d.supports_binary_kw_as_cast())
1809418094
.one_statement_parses_to("SELECT BINARY 1+1", "SELECT CAST(1 + 1 AS BINARY)");
1809518095
}
18096+
18097+
#[test]
18098+
fn parse_semi_structured_data_traversal() {
18099+
let dialects = TestedDialects::new(vec![
18100+
Box::new(GenericDialect {}),
18101+
Box::new(SnowflakeDialect {}),
18102+
Box::new(DatabricksDialect {}),
18103+
]);
18104+
18105+
// most basic case
18106+
let sql = "SELECT a:b FROM t";
18107+
let select = dialects.verified_only_select(sql);
18108+
assert_eq!(
18109+
SelectItem::UnnamedExpr(Expr::JsonAccess {
18110+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18111+
path: JsonPath {
18112+
path: vec![JsonPathElem::Dot {
18113+
key: "b".to_owned(),
18114+
quoted: false
18115+
}]
18116+
},
18117+
}),
18118+
select.projection[0]
18119+
);
18120+
18121+
// identifier can be quoted
18122+
let sql = r#"SELECT a:"my long object key name" FROM t"#;
18123+
let select = dialects.verified_only_select(sql);
18124+
assert_eq!(
18125+
SelectItem::UnnamedExpr(Expr::JsonAccess {
18126+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18127+
path: JsonPath {
18128+
path: vec![JsonPathElem::Dot {
18129+
key: "my long object key name".to_owned(),
18130+
quoted: true
18131+
}]
18132+
},
18133+
}),
18134+
select.projection[0]
18135+
);
18136+
18137+
dialects.verified_stmt("SELECT a:b::INT FROM t");
18138+
18139+
// unquoted keywords are permitted in the object key
18140+
let sql = "SELECT a:select, a:from FROM t";
18141+
let select = dialects.verified_only_select(sql);
18142+
assert_eq!(
18143+
vec![
18144+
SelectItem::UnnamedExpr(Expr::JsonAccess {
18145+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18146+
path: JsonPath {
18147+
path: vec![JsonPathElem::Dot {
18148+
key: "select".to_owned(),
18149+
quoted: false
18150+
}]
18151+
},
18152+
}),
18153+
SelectItem::UnnamedExpr(Expr::JsonAccess {
18154+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18155+
path: JsonPath {
18156+
path: vec![JsonPathElem::Dot {
18157+
key: "from".to_owned(),
18158+
quoted: false
18159+
}]
18160+
},
18161+
})
18162+
],
18163+
select.projection
18164+
);
18165+
18166+
// multiple levels can be traversed
18167+
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
18168+
let sql = r#"SELECT a:foo."bar".baz"#;
18169+
let select = dialects.verified_only_select(sql);
18170+
assert_eq!(
18171+
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
18172+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18173+
path: JsonPath {
18174+
path: vec![
18175+
JsonPathElem::Dot {
18176+
key: "foo".to_owned(),
18177+
quoted: false,
18178+
},
18179+
JsonPathElem::Dot {
18180+
key: "bar".to_owned(),
18181+
quoted: true,
18182+
},
18183+
JsonPathElem::Dot {
18184+
key: "baz".to_owned(),
18185+
quoted: false,
18186+
}
18187+
]
18188+
},
18189+
})],
18190+
select.projection
18191+
);
18192+
18193+
// dot and bracket notation can be mixed (starting with : case)
18194+
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
18195+
let sql = r#"SELECT a:foo[0].bar"#;
18196+
let select = dialects.verified_only_select(sql);
18197+
assert_eq!(
18198+
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
18199+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18200+
path: JsonPath {
18201+
path: vec![
18202+
JsonPathElem::Dot {
18203+
key: "foo".to_owned(),
18204+
quoted: false,
18205+
},
18206+
JsonPathElem::Bracket {
18207+
key: Expr::value(number("0")),
18208+
},
18209+
JsonPathElem::Dot {
18210+
key: "bar".to_owned(),
18211+
quoted: false,
18212+
}
18213+
]
18214+
},
18215+
})],
18216+
select.projection
18217+
);
18218+
}
18219+
18220+
#[test]
18221+
fn parse_array_subscript() {
18222+
let dialects = all_dialects_except(|d| {
18223+
d.is::<MsSqlDialect>()
18224+
|| d.is::<SnowflakeDialect>()
18225+
|| d.is::<SQLiteDialect>()
18226+
|| d.is::<RedshiftSqlDialect>()
18227+
});
18228+
18229+
dialects.verified_stmt("SELECT arr[1]");
18230+
dialects.verified_stmt("SELECT arr[:]");
18231+
dialects.verified_stmt("SELECT arr[1:2]");
18232+
dialects.verified_stmt("SELECT arr[1:2:4]");
18233+
dialects.verified_stmt("SELECT arr[1:array_length(arr)]");
18234+
dialects.verified_stmt("SELECT arr[array_length(arr) - 1:array_length(arr)]");
18235+
dialects
18236+
.verified_stmt("SELECT arr[array_length(arr) - 2:array_length(arr) - 1:array_length(arr)]");
18237+
18238+
dialects.verified_stmt("SELECT arr[1][2]");
18239+
dialects.verified_stmt("SELECT arr[:][:]");
18240+
}

tests/sqlparser_snowflake.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,6 +1265,8 @@ fn parse_lateral_flatten() {
12651265
// https://docs.snowflake.com/en/user-guide/querying-semistructured
12661266
#[test]
12671267
fn parse_semi_structured_data_traversal() {
1268+
// see `tests/sqlparser_common.rs` -> `parse_semi_structured_data_traversal` for more test
1269+
// cases. This test only has Snowflake-specific syntax like array access.
12681270
// most basic case
12691271
let sql = "SELECT a:b FROM t";
12701272
let select = snowflake().verified_only_select(sql);

0 commit comments

Comments
 (0)