Skip to content

Commit 47b6aac

Browse files
authored
add support for databricks JSON accessors (#2272)
1 parent d38dd78 commit 47b6aac

File tree

4 files changed

+66
-3
lines changed

4 files changed

+66
-3
lines changed

src/ast/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,14 @@ pub enum JsonPathElem {
651651
/// The expression used as the bracket key (string or numeric expression).
652652
key: Expr,
653653
},
654+
/// Access an object field using colon bracket notation
655+
/// e.g. `obj:['foo']`
656+
///
657+
/// See <https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html>
658+
ColonBracket {
659+
/// The expression used as the bracket key (string or numeric expression).
660+
key: Expr,
661+
},
654662
}
655663

656664
/// A JSON path.
@@ -685,6 +693,9 @@ impl fmt::Display for JsonPath {
685693
JsonPathElem::Bracket { key } => {
686694
write!(f, "[{key}]")?;
687695
}
696+
JsonPathElem::ColonBracket { key } => {
697+
write!(f, ":[{key}]")?;
698+
}
688699
}
689700
}
690701
Ok(())

src/ast/spans.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1799,6 +1799,7 @@ impl Spanned for JsonPathElem {
17991799
match self {
18001800
JsonPathElem::Dot { .. } => Span::empty(),
18011801
JsonPathElem::Bracket { key } => key.span(),
1802+
JsonPathElem::ColonBracket { key } => key.span(),
18021803
}
18031804
}
18041805
}

src/parser/mod.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4195,8 +4195,9 @@ impl<'a> Parser<'a> {
41954195
match token.token {
41964196
Token::Word(Word {
41974197
value,
4198-
// path segments in SF dot notation can be unquoted or double-quoted
4199-
quote_style: quote_style @ (Some('"') | None),
4198+
// path segments in SF dot notation can be unquoted or double-quoted;
4199+
// Databricks also supports backtick-quoted identifiers
4200+
quote_style: quote_style @ (Some('"') | Some('`') | None),
42004201
// some experimentation suggests that snowflake permits
42014202
// any keyword here unquoted.
42024203
keyword: _,
@@ -4226,14 +4227,20 @@ impl<'a> Parser<'a> {
42264227
let mut path = Vec::new();
42274228
loop {
42284229
match self.next_token().token {
4230+
Token::Colon if path.is_empty() && self.peek_token_ref() == &Token::LBracket => {
4231+
self.next_token();
4232+
let key = self.parse_wildcard_expr()?;
4233+
self.expect_token(&Token::RBracket)?;
4234+
path.push(JsonPathElem::ColonBracket { key });
4235+
}
42294236
Token::Colon if path.is_empty() => {
42304237
path.push(self.parse_json_path_object_key()?);
42314238
}
42324239
Token::Period if !path.is_empty() => {
42334240
path.push(self.parse_json_path_object_key()?);
42344241
}
42354242
Token::LBracket => {
4236-
let key = self.parse_expr()?;
4243+
let key = self.parse_wildcard_expr()?;
42374244
self.expect_token(&Token::RBracket)?;
42384245

42394246
path.push(JsonPathElem::Bracket { key });

tests/sqlparser_databricks.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,3 +600,47 @@ fn parse_databricks_struct_type() {
600600
_ => unreachable!(),
601601
}
602602
}
603+
604+
#[test]
605+
fn parse_databricks_json_accessor() {
606+
// Basic colon accessor — unquoted field names are case-insensitive
607+
databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM store_data");
608+
609+
// Unquoted field access is case-insensitive; bracket notation is case-sensitive.
610+
databricks().verified_only_select(
611+
"SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive FROM store_data",
612+
);
613+
614+
// Backtick-quoted keys (Databricks delimited identifiers) normalise to double-quoted output.
615+
databricks().one_statement_parses_to(
616+
"SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM store_data",
617+
r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM store_data"#,
618+
);
619+
620+
// Dot notation
621+
databricks().verified_only_select("SELECT raw:store.bicycle FROM store_data");
622+
623+
// String-key bracket notation after a dot segment
624+
databricks()
625+
.verified_only_select("SELECT raw:store['bicycle'], raw:store['BICYCLE'] FROM store_data");
626+
627+
// Integer-index bracket notation
628+
databricks()
629+
.verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1] FROM store_data");
630+
631+
// Wildcard [*] — including chained and mixed positions
632+
databricks().verified_only_select(
633+
"SELECT raw:store.basket[*], raw:store.basket[*][0] AS first_of_baskets, \
634+
raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS all_elements_flattened, \
635+
raw:store.basket[0][2].b AS subfield FROM store_data",
636+
);
637+
638+
// Dot access following a wildcard bracket
639+
databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM store_data");
640+
641+
// Double-colon cast — type keyword normalises to upper case
642+
databricks().one_statement_parses_to(
643+
"SELECT raw:store.bicycle.price::double FROM store_data",
644+
"SELECT raw:store.bicycle.price::DOUBLE FROM store_data",
645+
);
646+
}

0 commit comments

Comments
 (0)