Skip to content

Commit 5d27860

Browse files
authored
feat: parse JsonAccess as a binary operator, add Operator::Colon (#20628)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> Needed in datafusion-contrib/datafusion-variant#26 ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> - `sqlparser-rs` currently exposes the colon operator (`:`) as a special `JsonAccess` expression. So it fails in datafusion's parsing before an `ExprPlanner` is even invoked. - Add `Operator::Colon`. Currently it's not used/implemented in datafusion. ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> - Fixes the above problem by converting `JsonAccess` to a normal binary expr, on which the `ExprPlanner` is invoked and custom parsing can be done. ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Added tests. Also did a prototype of a custom `ExprPlanner` in datafusion-variant using this to convert colon operator to `variant_get` function - datafusion-contrib/datafusion-variant#31 ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> Add `Operator::Colon` ## Alternatives considered I have arrived at this solution iteratively to solve datafusion-contrib/datafusion-variant#26. Some alternatives I considered (but NOT IMPLEMENTED): - Add a new `Expr::JsonAccess` instead of re-using `BinaryExpr`. Alongside this, we could also introduce datafusion's version of a `JsonPath` struct. `Expr::JsonAccess` could also contain the optional cast that usually comes after it (like in `col_a:field_b::int`). The cast might be needed for performance optimizations. This is a much bigger change, so I have not done it. If this makes sense to do at this time, please let me know and I will do those changes.
1 parent 0f093f4 commit 5d27860

File tree

8 files changed

+85
-8
lines changed

8 files changed

+85
-8
lines changed

datafusion/expr-common/src/operator.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ pub enum Operator {
140140
///
141141
/// Not implemented in DataFusion yet.
142142
QuestionPipe,
143+
/// Colon operator, like `:`
144+
///
145+
/// Not implemented in DataFusion yet.
146+
Colon,
143147
}
144148

145149
impl Operator {
@@ -188,7 +192,8 @@ impl Operator {
188192
| Operator::AtQuestion
189193
| Operator::Question
190194
| Operator::QuestionAnd
191-
| Operator::QuestionPipe => None,
195+
| Operator::QuestionPipe
196+
| Operator::Colon => None,
192197
}
193198
}
194199

@@ -283,7 +288,8 @@ impl Operator {
283288
| Operator::AtQuestion
284289
| Operator::Question
285290
| Operator::QuestionAnd
286-
| Operator::QuestionPipe => None,
291+
| Operator::QuestionPipe
292+
| Operator::Colon => None,
287293
}
288294
}
289295

@@ -323,7 +329,8 @@ impl Operator {
323329
| Operator::AtQuestion
324330
| Operator::Question
325331
| Operator::QuestionAnd
326-
| Operator::QuestionPipe => 30,
332+
| Operator::QuestionPipe
333+
| Operator::Colon => 30,
327334
Operator::Plus | Operator::Minus => 40,
328335
Operator::Multiply | Operator::Divide | Operator::Modulo => 45,
329336
}
@@ -369,7 +376,8 @@ impl Operator {
369376
| Operator::AtQuestion
370377
| Operator::Question
371378
| Operator::QuestionAnd
372-
| Operator::QuestionPipe => true,
379+
| Operator::QuestionPipe
380+
| Operator::Colon => true,
373381

374382
// E.g. `TRUE OR NULL` is `TRUE`
375383
Operator::Or
@@ -429,6 +437,7 @@ impl fmt::Display for Operator {
429437
Operator::Question => "?",
430438
Operator::QuestionAnd => "?&",
431439
Operator::QuestionPipe => "?|",
440+
Operator::Colon => ":",
432441
};
433442
write!(f, "{display}")
434443
}

datafusion/expr-common/src/type_coercion/binary.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,9 @@ impl<'a> BinaryTypeCoercer<'a> {
324324
)
325325
}
326326
},
327+
Colon => {
328+
Ok(Signature { lhs: lhs.clone(), rhs: rhs.clone(), ret: lhs.clone() })
329+
},
327330
IntegerDivide | Arrow | LongArrow | HashArrow | HashLongArrow
328331
| HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe => {
329332
not_impl_err!("Operator {} is not yet supported", self.op)

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ impl BinaryExpr {
715715
StringConcat => concat_elements(&left, &right),
716716
AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt
717717
| HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe
718-
| IntegerDivide => {
718+
| IntegerDivide | Colon => {
719719
not_impl_err!(
720720
"Binary operator '{:?}' is not supported in the physical expr",
721721
self.op

datafusion/sql/src/expr/binary_op.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use sqlparser::ast::BinaryOperator;
2222

2323
impl<S: ContextProvider> SqlToRel<'_, S> {
2424
pub(crate) fn parse_sql_binary_op(&self, op: &BinaryOperator) -> Result<Operator> {
25-
match *op {
25+
match op {
2626
BinaryOperator::Gt => Ok(Operator::Gt),
2727
BinaryOperator::GtEq => Ok(Operator::GtEq),
2828
BinaryOperator::Lt => Ok(Operator::Lt),
@@ -68,6 +68,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
6868
BinaryOperator::Question => Ok(Operator::Question),
6969
BinaryOperator::QuestionAnd => Ok(Operator::QuestionAnd),
7070
BinaryOperator::QuestionPipe => Ok(Operator::QuestionPipe),
71+
BinaryOperator::Custom(s) if s == ":" => Ok(Operator::Colon),
7172
_ => not_impl_err!("Unsupported binary operator: {:?}", op),
7273
}
7374
}

datafusion/sql/src/expr/mod.rs

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ use datafusion_expr::planner::{
2222
use sqlparser::ast::{
2323
AccessExpr, BinaryOperator, CastFormat, CastKind, CeilFloorKind,
2424
DataType as SQLDataType, DateTimeField, DictionaryField, Expr as SQLExpr,
25-
ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript, TrimWhereField,
26-
TypedString, Value, ValueWithSpan,
25+
ExprWithAlias as SQLExprWithAlias, JsonPath, MapEntry, StructField, Subscript,
26+
TrimWhereField, TypedString, Value, ValueWithSpan,
2727
};
2828

2929
use datafusion_common::{
@@ -651,10 +651,36 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
651651
options: Box::new(WildcardOptions::default()),
652652
}),
653653
SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
654+
SQLExpr::JsonAccess { value, path } => {
655+
self.parse_json_access(schema, planner_context, value, &path)
656+
}
654657
_ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
655658
}
656659
}
657660

661+
fn parse_json_access(
662+
&self,
663+
schema: &DFSchema,
664+
planner_context: &mut PlannerContext,
665+
value: Box<SQLExpr>,
666+
path: &JsonPath,
667+
) -> Result<Expr> {
668+
let json_path = path.to_string();
669+
let json_path = if let Some(json_path) = json_path.strip_prefix(":") {
670+
// sqlparser's JsonPath display adds an extra `:` at the beginning.
671+
json_path.to_owned()
672+
} else {
673+
json_path
674+
};
675+
self.build_logical_expr(
676+
BinaryOperator::Custom(":".to_owned()),
677+
self.sql_to_expr(*value, schema, planner_context)?,
678+
// pass json path as a string literal, let the impl parse it when needed.
679+
Expr::Literal(ScalarValue::Utf8(Some(json_path)), None),
680+
schema,
681+
)
682+
}
683+
658684
/// Parses a struct(..) expression and plans it creation
659685
fn parse_struct(
660686
&self,

datafusion/sql/src/unparser/expr.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,7 @@ impl Unparser<'_> {
10941094
Operator::Question => Ok(BinaryOperator::Question),
10951095
Operator::QuestionAnd => Ok(BinaryOperator::QuestionAnd),
10961096
Operator::QuestionPipe => Ok(BinaryOperator::QuestionPipe),
1097+
Operator::Colon => Ok(BinaryOperator::Custom(":".to_owned())),
10971098
}
10981099
}
10991100

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2821,3 +2821,39 @@ fn test_struct_expr3() {
28212821
@r#"SELECT test.c1."metadata".product."name" FROM (SELECT {"metadata": {product: {"name": 'Product Name'}}} AS c1) AS test"#
28222822
);
28232823
}
2824+
2825+
#[test]
2826+
fn test_json_access_1() {
2827+
let statement = generate_round_trip_statement(
2828+
GenericDialect {},
2829+
r#"SELECT j1_string:field FROM j1"#,
2830+
);
2831+
assert_snapshot!(
2832+
statement,
2833+
@r#"SELECT (j1.j1_string : 'field') FROM j1"#
2834+
);
2835+
}
2836+
2837+
#[test]
2838+
fn test_json_access_2() {
2839+
let statement = generate_round_trip_statement(
2840+
GenericDialect {},
2841+
r#"SELECT j1_string:field[0] FROM j1"#,
2842+
);
2843+
assert_snapshot!(
2844+
statement,
2845+
@r#"SELECT (j1.j1_string : 'field[0]') FROM j1"#
2846+
);
2847+
}
2848+
2849+
#[test]
2850+
fn test_json_access_3() {
2851+
let statement = generate_round_trip_statement(
2852+
GenericDialect {},
2853+
r#"SELECT j1_string:field.inner1['inner2'] FROM j1"#,
2854+
);
2855+
assert_snapshot!(
2856+
statement,
2857+
@r#"SELECT (j1.j1_string : 'field.inner1[''inner2'']') FROM j1"#
2858+
);
2859+
}

datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,5 +344,6 @@ pub fn operator_to_name(op: Operator) -> &'static str {
344344
Operator::BitwiseXor => "bitwise_xor",
345345
Operator::BitwiseShiftRight => "bitwise_shift_right",
346346
Operator::BitwiseShiftLeft => "bitwise_shift_left",
347+
Operator::Colon => "colon",
347348
}
348349
}

0 commit comments

Comments
 (0)