Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ pub trait Dialect: Send + Sync {
DateFieldExtractStyle::DatePart
}

/// The style to use when unparsing DISTINCT FROM style expressions
fn distinct_from_style(&self) -> DistinctFromStyle {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this new default changes existing behavior for downstream and custom dialects. Before this PR, a Dialect that did not override this area could still unparse IS DISTINCT FROM and IS NOT DISTINCT FROM using the standard full-text syntax.

With the default now set to Unsupported, those same dialects can start returning not_impl_err! for expressions that previously worked. The MySQL fix only needs a MySQL-specific override, so I think the trait default should preserve the old behavior. For example, this could default to DistinctFromStyle::FullText and only dialects that need a different spelling would override it.

@zyuiop zyuiop Jun 18, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I have therefore removed the Unsupported enum variant

DistinctFromStyle::FullText
}

/// The character length extraction style to use: `CharacterLengthStyle`
fn character_length_style(&self) -> CharacterLengthStyle {
CharacterLengthStyle::CharacterLength
Expand Down Expand Up @@ -333,6 +338,15 @@ pub enum CharacterLengthStyle {
CharacterLength,
}

/// `DistinctFromStyle` to use for unparsing `IsDistinctFrom` and `IsNotDistinctFrom` operators
#[derive(Clone, Copy, PartialEq)]
pub enum DistinctFromStyle {
/// DBMS supports `IS (NOT) DISTINCT FROM`
FullText,
/// DBMS supports equivalent operations via `<=>` and `NOT <=>`
Spaceship,
}

pub struct DefaultDialect {}

impl Dialect for DefaultDialect {
Expand Down Expand Up @@ -385,6 +399,10 @@ impl Dialect for PostgreSqlDialect {
ast::DataType::SmallInt(None)
}

fn distinct_from_style(&self) -> DistinctFromStyle {
DistinctFromStyle::FullText
}

fn scalar_function_to_sql_overrides(
&self,
unparser: &Unparser,
Expand Down Expand Up @@ -529,6 +547,10 @@ impl Dialect for DuckDBDialect {

Ok(None)
}

fn distinct_from_style(&self) -> DistinctFromStyle {
DistinctFromStyle::FullText
}
}

pub struct MySqlDialect {}
Expand Down Expand Up @@ -562,6 +584,10 @@ impl Dialect for MySqlDialect {
DateFieldExtractStyle::Extract
}

fn distinct_from_style(&self) -> DistinctFromStyle {
DistinctFromStyle::Spaceship
}

fn int64_cast_dtype(&self) -> ast::DataType {
ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
}
Expand Down Expand Up @@ -619,6 +645,10 @@ impl Dialect for SqliteDialect {
CharacterLengthStyle::Length
}

fn distinct_from_style(&self) -> DistinctFromStyle {
DistinctFromStyle::FullText
}

fn supports_column_alias_in_table_alias(&self) -> bool {
false
}
Expand Down
47 changes: 37 additions & 10 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use std::sync::Arc;
use std::vec;

use super::Unparser;
use super::dialect::IntervalStyle;
use super::dialect::{DistinctFromStyle, IntervalStyle};
use arrow::array::{
ArrayRef, Date32Array, Date64Array, PrimitiveArray,
types::{
Expand Down Expand Up @@ -156,10 +156,23 @@ impl Unparser<'_> {
let l = self.expr_to_sql_inner(left.as_ref())?;
let r = self.expr_to_sql_inner(right.as_ref())?;

Ok(ast::Expr::Nested(Box::new(ast::Expr::IsDistinctFrom(
Box::new(l),
Box::new(r),
))))
match self.dialect.distinct_from_style() {
DistinctFromStyle::FullText => Ok(ast::Expr::Nested(Box::new(
ast::Expr::IsDistinctFrom(Box::new(l), Box::new(r)),
))),
DistinctFromStyle::Spaceship => {
Ok(ast::Expr::Nested(Box::new(ast::Expr::UnaryOp {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the MySQL spelling for IS DISTINCT FROM needs to force the <=> comparison to be parsed as the operand of NOT.

As written, NOT <=> can be ambiguous with MySQL HIGH_NOT_PRECEDENCE, where NOT a <=> b may be parsed as (NOT a) <=> b. That is not equivalent to a IS DISTINCT FROM b.

Could this build the unary expression over a nested spaceship comparison instead? For example: NOT (c1 <=> true), or with outer parentheses as (NOT (c1 <=> true)).

@zyuiop zyuiop Jun 18, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I have added Expr::Nested inside the UnaryOp

op: UnaryOperator::Not,
expr: Box::new(ast::Expr::Nested(Box::new(
ast::Expr::BinaryOp {
left: Box::new(l),
right: Box::new(r),
op: BinaryOperator::Spaceship,
},
))),
})))
}
}
}
Expr::BinaryExpr(BinaryExpr {
left,
Expand All @@ -169,10 +182,18 @@ impl Unparser<'_> {
let l = self.expr_to_sql_inner(left.as_ref())?;
let r = self.expr_to_sql_inner(right.as_ref())?;

Ok(ast::Expr::Nested(Box::new(ast::Expr::IsNotDistinctFrom(
Box::new(l),
Box::new(r),
))))
match self.dialect.distinct_from_style() {
DistinctFromStyle::FullText => Ok(ast::Expr::Nested(Box::new(
ast::Expr::IsNotDistinctFrom(Box::new(l), Box::new(r)),
))),
DistinctFromStyle::Spaceship => {
Ok(ast::Expr::Nested(Box::new(ast::Expr::BinaryOp {
left: Box::new(l),
right: Box::new(r),
op: BinaryOperator::Spaceship,
})))
}
}
}
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
let l = self.expr_to_sql_inner(left.as_ref())?;
Expand Down Expand Up @@ -1908,7 +1929,7 @@ mod tests {
use std::ops::{Add, Sub};
use std::{sync::Arc, vec};

use crate::unparser::dialect::SqliteDialect;
use crate::unparser::dialect::{MySqlDialect, SqliteDialect};
use arrow::array::{LargeListArray, LargeListViewArray, ListArray, ListViewArray};
use arrow::datatypes::{DataType::Int8, Field, Int32Type, Schema, TimeUnit};
use ast::ObjectName;
Expand Down Expand Up @@ -3714,6 +3735,8 @@ mod tests {

#[test]
fn test_is_distinct_from() {
let mysql_unparser = Unparser::new(&MySqlDialect {});

let expr = Expr::BinaryExpr(BinaryExpr::new(
Box::new(col("c1")),
Operator::IsDistinctFrom,
Expand All @@ -3722,6 +3745,8 @@ mod tests {

let sql = expr_to_sql(&expr).unwrap().to_string();
assert_eq!(sql, "(c1 IS DISTINCT FROM true)");
let sql = mysql_unparser.expr_to_sql(&expr).unwrap().to_string();
assert_eq!(sql, "(NOT (`c1` <=> true))");

let expr = Expr::BinaryExpr(BinaryExpr::new(
Box::new(col("c1")),
Expand All @@ -3731,6 +3756,8 @@ mod tests {

let sql = expr_to_sql(&expr).unwrap().to_string();
assert_eq!(sql, "(c1 IS NOT DISTINCT FROM true)");
let sql = mysql_unparser.expr_to_sql(&expr).unwrap().to_string();
assert_eq!(sql, "(`c1` <=> true)");
}

#[test]
Expand Down