Skip to content

Commit 2ac3fff

Browse files
committed
fix: inherit field names from left projection in set expressions when missing
Otherwise they might end up having the same column names, which is prohibited. This is only the case for literals, other expression types should have default unique names.
1 parent 37b9a46 commit 2ac3fff

File tree

3 files changed

+249
-3
lines changed

3 files changed

+249
-3
lines changed

datafusion/sql/src/set_expr.rs

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
1919
use datafusion_common::{
20-
DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err,
20+
DFSchemaRef, DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err,
2121
};
2222
use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
23-
use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier, Spanned};
23+
use sqlparser::ast::{
24+
Expr as SQLExpr, Ident, SelectItem, SetExpr, SetOperator, SetQuantifier, Spanned,
25+
};
2426

2527
impl<S: ContextProvider> SqlToRel<'_, S> {
2628
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
@@ -42,7 +44,27 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
4244
let left_span = Span::try_from_sqlparser_span(left.span());
4345
let right_span = Span::try_from_sqlparser_span(right.span());
4446
let left_plan = self.set_expr_to_plan(*left, planner_context);
45-
let right_plan = self.set_expr_to_plan(*right, planner_context);
47+
48+
// For non-*ByName operations, add missing aliases to right side using left schema's
49+
// column names. This allows queries like
50+
// `SELECT 1 c1, 0 c2, 0 c3 UNION ALL SELECT 2, 0, 0`
51+
// where the right side has duplicate literal values.
52+
// We only do this if the left side succeeded.
53+
let right = if let Ok(plan) = &left_plan
54+
&& matches!(
55+
set_quantifier,
56+
SetQuantifier::All
57+
| SetQuantifier::Distinct
58+
| SetQuantifier::None
59+
) {
60+
alias_set_expr(*right, plan.schema())
61+
} else {
62+
*right
63+
};
64+
65+
let right_plan = self.set_expr_to_plan(right, planner_context);
66+
67+
// Handle errors from both sides, collecting them if both failed
4668
let (left_plan, right_plan) = match (left_plan, right_plan) {
4769
(Ok(left_plan), Ok(right_plan)) => (left_plan, right_plan),
4870
(Err(left_err), Err(right_err)) => {
@@ -160,3 +182,75 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
160182
}
161183
}
162184
}
185+
186+
// Adds aliases to SELECT items in a SetExpr using the provided schema.
187+
// This ensures that unnamed expressions on the right side of a UNION/INTERSECT/EXCEPT
188+
// get aliased with the column names from the left side, allowing queries like
189+
// `SELECT 1 AS a, 0 AS b, 0 AS c UNION ALL SELECT 2, 0, 0` to work correctly.
190+
fn alias_set_expr(set_expr: SetExpr, schema: &DFSchemaRef) -> SetExpr {
191+
match set_expr {
192+
SetExpr::Select(mut select) => {
193+
alias_select_items(&mut select.projection, schema);
194+
SetExpr::Select(select)
195+
}
196+
SetExpr::SetOperation {
197+
op,
198+
left,
199+
right,
200+
set_quantifier,
201+
} => {
202+
// For nested set operations, only alias the leftmost branch
203+
// since that's what determines the output column names
204+
SetExpr::SetOperation {
205+
op,
206+
left: Box::new(alias_set_expr(*left, schema)),
207+
right,
208+
set_quantifier,
209+
}
210+
}
211+
SetExpr::Query(mut query) => {
212+
// Handle parenthesized queries like (SELECT ... UNION ALL SELECT ...)
213+
query.body = Box::new(alias_set_expr(*query.body, schema));
214+
SetExpr::Query(query)
215+
}
216+
// For other cases (Values, etc.), return as-is
217+
other => other,
218+
}
219+
}
220+
221+
// Adds aliases to literal value expressions where missing, based on the input schema, as these are
222+
// the ones that can cause duplicate name issues (e.g. `SELECT 0, 0` has two columns named `Int64(0)`).
223+
// Other expressions typically have unique names.
224+
fn alias_select_items(items: &mut [SelectItem], schema: &DFSchemaRef) {
225+
let mut col_idx = 0;
226+
for item in items.iter_mut() {
227+
match item {
228+
SelectItem::UnnamedExpr(expr) if is_literal_value(expr) => {
229+
if let Some(field) = schema.fields().get(col_idx) {
230+
*item = SelectItem::ExprWithAlias {
231+
expr: expr.clone(),
232+
alias: Ident::new(field.name()),
233+
};
234+
}
235+
col_idx += 1;
236+
}
237+
SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
238+
col_idx += 1;
239+
}
240+
SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => {
241+
// Wildcards expand to multiple columns - skip position tracking
242+
}
243+
}
244+
}
245+
}
246+
247+
/// Returns true if the expression is a literal value that could cause duplicate names.
248+
fn is_literal_value(expr: &SQLExpr) -> bool {
249+
matches!(
250+
expr,
251+
SQLExpr::Value(_)
252+
| SQLExpr::UnaryOp { .. }
253+
| SQLExpr::TypedString { .. }
254+
| SQLExpr::Interval(_)
255+
)
256+
}

datafusion/sql/tests/sql_integration.rs

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,6 +2655,123 @@ fn union_all_by_name_same_column_names() {
26552655
);
26562656
}
26572657

2658+
#[test]
2659+
fn union_all_with_duplicate_literals() {
2660+
// Test that UNION ALL works with duplicate literal values in the right side
2661+
// The right side's expressions should inherit aliases from the left side's schema
2662+
let sql = "SELECT 1 as one, 0 as two, 0 as three UNION ALL SELECT 2, 0, 0";
2663+
let plan = logical_plan(sql).unwrap();
2664+
assert_snapshot!(
2665+
plan,
2666+
@r"
2667+
Union
2668+
Projection: Int64(1) AS one, Int64(0) AS two, Int64(0) AS three
2669+
EmptyRelation: rows=1
2670+
Projection: Int64(2) AS one, Int64(0) AS two, Int64(0) AS three
2671+
EmptyRelation: rows=1
2672+
"
2673+
);
2674+
}
2675+
2676+
#[test]
2677+
fn union_with_duplicate_literals() {
2678+
// Test that UNION DISTINCT also works with duplicate literals
2679+
let sql = "SELECT 1 as a, 2 as b UNION SELECT 3, 3";
2680+
let plan = logical_plan(sql).unwrap();
2681+
assert_snapshot!(
2682+
plan,
2683+
@r"
2684+
Distinct:
2685+
Union
2686+
Projection: Int64(1) AS a, Int64(2) AS b
2687+
EmptyRelation: rows=1
2688+
Projection: Int64(3) AS a, Int64(3) AS b
2689+
EmptyRelation: rows=1
2690+
"
2691+
);
2692+
}
2693+
2694+
#[test]
2695+
fn intersect_with_duplicate_literals() {
2696+
// Test that INTERSECT works with duplicate literals
2697+
let sql = "SELECT 1 as a, 0 as b, 0 as c INTERSECT SELECT 1, 0, 0";
2698+
let plan = logical_plan(sql).unwrap();
2699+
assert_snapshot!(
2700+
plan,
2701+
@r"
2702+
LeftSemi Join: left.a = right.a, left.b = right.b, left.c = right.c
2703+
Distinct:
2704+
SubqueryAlias: left
2705+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2706+
EmptyRelation: rows=1
2707+
SubqueryAlias: right
2708+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2709+
EmptyRelation: rows=1
2710+
"
2711+
);
2712+
}
2713+
2714+
#[test]
2715+
fn except_with_duplicate_literals() {
2716+
// Test that EXCEPT works with duplicate literals
2717+
let sql = "SELECT 1 as a, 0 as b, 0 as c EXCEPT SELECT 2, 0, 0";
2718+
let plan = logical_plan(sql).unwrap();
2719+
assert_snapshot!(
2720+
plan,
2721+
@r"
2722+
LeftAnti Join: left.a = right.a, left.b = right.b, left.c = right.c
2723+
Distinct:
2724+
SubqueryAlias: left
2725+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2726+
EmptyRelation: rows=1
2727+
SubqueryAlias: right
2728+
Projection: Int64(2) AS a, Int64(0) AS b, Int64(0) AS c
2729+
EmptyRelation: rows=1
2730+
"
2731+
);
2732+
}
2733+
2734+
#[test]
2735+
fn nested_union_with_duplicate_literals() {
2736+
// Test that nested UNIONs work correctly with duplicate literals (left-associative)
2737+
let sql = "SELECT 'a' as x, 1 as y UNION ALL SELECT 'b', 1 UNION ALL SELECT 'c', 1";
2738+
let plan = logical_plan(sql).unwrap();
2739+
assert_snapshot!(
2740+
plan,
2741+
@r#"
2742+
Union
2743+
Union
2744+
Projection: Utf8("a") AS x, Int64(1) AS y
2745+
EmptyRelation: rows=1
2746+
Projection: Utf8("b") AS x, Int64(1) AS y
2747+
EmptyRelation: rows=1
2748+
Projection: Utf8("c") AS x, Int64(1) AS y
2749+
EmptyRelation: rows=1
2750+
"#
2751+
);
2752+
}
2753+
2754+
#[test]
2755+
fn right_nested_union_with_duplicate_literals() {
2756+
// Test that right-nested UNIONs work correctly with duplicate literals
2757+
// Here the right side is a parenthesized set operation
2758+
let sql = "SELECT 1 as a, 0 as b UNION ALL (SELECT 2, 0 UNION ALL SELECT 3, 0)";
2759+
let plan = logical_plan(sql).unwrap();
2760+
assert_snapshot!(
2761+
plan,
2762+
@r"
2763+
Union
2764+
Projection: Int64(1) AS a, Int64(0) AS b
2765+
EmptyRelation: rows=1
2766+
Union
2767+
Projection: Int64(2) AS a, Int64(0) AS b
2768+
EmptyRelation: rows=1
2769+
Projection: Int64(3) AS a, Int64(0) AS b
2770+
EmptyRelation: rows=1
2771+
"
2772+
);
2773+
}
2774+
26582775
#[test]
26592776
fn empty_over() {
26602777
let sql = "SELECT order_id, MAX(order_id) OVER () from orders";

datafusion/sqllogictest/test_files/union.slt

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -976,3 +976,38 @@ query TITT
976976
END
977977
----
978978
ns3 4 t3 m
979+
980+
# Test UNION ALL with duplicate literal values on the right side
981+
# The right side has duplicate 0 literals which should inherit column names from the left
982+
query III rowsort
983+
SELECT 1 as a, 0 as b, 0 as c
984+
UNION ALL
985+
SELECT 2, 0, 0
986+
----
987+
1 0 0
988+
2 0 0
989+
990+
# Test nested UNION ALL with duplicate literals
991+
query II rowsort
992+
SELECT 1 as a, 0 as b
993+
UNION ALL
994+
SELECT 2, 0
995+
UNION ALL
996+
SELECT 3, 0
997+
----
998+
1 0
999+
2 0
1000+
3 0
1001+
1002+
# Test mixed set operations with nested EXCEPT and INTERSECT
1003+
# Only the first SELECT has named columns, the rest inherit the schema
1004+
query II rowsort
1005+
SELECT 1 as a, 0 as b
1006+
UNION ALL
1007+
(SELECT 2, 0 EXCEPT SELECT 3, 0)
1008+
UNION ALL
1009+
(SELECT 4, 0 INTERSECT SELECT 4, 0)
1010+
----
1011+
1 0
1012+
2 0
1013+
4 0

0 commit comments

Comments
 (0)