Skip to content

Commit 4b68b2e

Browse files
committed
fix: inherit field names from left projection in set expressions when missing
Otherwise they might end up having the same column names, which is prohibited. This is only the case for literals, other expression types should have default unique names.
1 parent 37b9a46 commit 4b68b2e

File tree

3 files changed

+242
-3
lines changed

3 files changed

+242
-3
lines changed

datafusion/sql/src/set_expr.rs

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
1919
use datafusion_common::{
20-
DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err,
20+
DFSchemaRef, DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err,
2121
};
2222
use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
23-
use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier, Spanned};
23+
use sqlparser::ast::{
24+
Expr as SQLExpr, Ident, SelectItem, SetExpr, SetOperator, SetQuantifier, Spanned,
25+
};
2426

2527
impl<S: ContextProvider> SqlToRel<'_, S> {
2628
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
@@ -42,7 +44,28 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
4244
let left_span = Span::try_from_sqlparser_span(left.span());
4345
let right_span = Span::try_from_sqlparser_span(right.span());
4446
let left_plan = self.set_expr_to_plan(*left, planner_context);
45-
let right_plan = self.set_expr_to_plan(*right, planner_context);
47+
48+
// For non-*ByName operations, add missing aliases to right side using left schema's
49+
// column names. This allows queries like
50+
// `SELECT 1 c1, 0 c2, 0 c3 UNION ALL SELECT 2, 0, 0`
51+
// where the right side has duplicate literal values.
52+
// We only do this if the left side succeeded.
53+
let right = if let Ok(plan) = &left_plan
54+
&& plan.schema().fields().len() > 1
55+
&& matches!(
56+
set_quantifier,
57+
SetQuantifier::All
58+
| SetQuantifier::Distinct
59+
| SetQuantifier::None
60+
) {
61+
alias_set_expr(*right, plan.schema())
62+
} else {
63+
*right
64+
};
65+
66+
let right_plan = self.set_expr_to_plan(right, planner_context);
67+
68+
// Handle errors from both sides, collecting them if both failed
4669
let (left_plan, right_plan) = match (left_plan, right_plan) {
4770
(Ok(left_plan), Ok(right_plan)) => (left_plan, right_plan),
4871
(Err(left_err), Err(right_err)) => {
@@ -160,3 +183,75 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
160183
}
161184
}
162185
}
186+
187+
// Adds aliases to SELECT items in a SetExpr using the provided schema.
188+
// This ensures that unnamed expressions on the right side of a UNION/INTERSECT/EXCEPT
189+
// get aliased with the column names from the left side, allowing queries like
190+
// `SELECT 1 AS a, 0 AS b, 0 AS c UNION ALL SELECT 2, 0, 0` to work correctly.
191+
fn alias_set_expr(set_expr: SetExpr, schema: &DFSchemaRef) -> SetExpr {
192+
match set_expr {
193+
SetExpr::Select(mut select) => {
194+
alias_select_items(&mut select.projection, schema);
195+
SetExpr::Select(select)
196+
}
197+
SetExpr::SetOperation {
198+
op,
199+
left,
200+
right,
201+
set_quantifier,
202+
} => {
203+
// For nested set operations, only alias the leftmost branch
204+
// since that's what determines the output column names
205+
SetExpr::SetOperation {
206+
op,
207+
left: Box::new(alias_set_expr(*left, schema)),
208+
right,
209+
set_quantifier,
210+
}
211+
}
212+
SetExpr::Query(mut query) => {
213+
// Handle parenthesized queries like (SELECT ... UNION ALL SELECT ...)
214+
query.body = Box::new(alias_set_expr(*query.body, schema));
215+
SetExpr::Query(query)
216+
}
217+
// For other cases (Values, etc.), return as-is
218+
other => other,
219+
}
220+
}
221+
222+
// Adds aliases to literal value expressions where missing, based on the input schema, as these are
223+
// the ones that can cause duplicate name issues (e.g. `SELECT 0, 0` has two columns named `Int64(0)`).
224+
// Other expressions typically have unique names.
225+
fn alias_select_items(items: &mut [SelectItem], schema: &DFSchemaRef) {
226+
let mut col_idx = 0;
227+
for item in items.iter_mut() {
228+
match item {
229+
SelectItem::UnnamedExpr(expr) if is_literal_value(expr) => {
230+
if let Some(field) = schema.fields().get(col_idx) {
231+
*item = SelectItem::ExprWithAlias {
232+
expr: expr.clone(),
233+
alias: Ident::new(field.name()),
234+
};
235+
}
236+
col_idx += 1;
237+
}
238+
SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
239+
col_idx += 1;
240+
}
241+
SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => {
242+
// Wildcards expand to multiple columns - skip position tracking
243+
}
244+
}
245+
}
246+
}
247+
248+
/// Returns true if the expression is a literal value that could cause duplicate names.
249+
fn is_literal_value(expr: &SQLExpr) -> bool {
250+
matches!(
251+
expr,
252+
SQLExpr::Value(_)
253+
| SQLExpr::UnaryOp { .. }
254+
| SQLExpr::TypedString { .. }
255+
| SQLExpr::Interval(_)
256+
)
257+
}

datafusion/sql/tests/sql_integration.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,6 +2655,115 @@ fn union_all_by_name_same_column_names() {
26552655
);
26562656
}
26572657

2658+
#[test]
2659+
fn union_all_with_duplicate_literals() {
2660+
let sql = "SELECT 1 as one, 0 as two, 0 as three UNION ALL SELECT 2, 0, 0";
2661+
let plan = logical_plan(sql).unwrap();
2662+
assert_snapshot!(
2663+
plan,
2664+
@r"
2665+
Union
2666+
Projection: Int64(1) AS one, Int64(0) AS two, Int64(0) AS three
2667+
EmptyRelation: rows=1
2668+
Projection: Int64(2) AS one, Int64(0) AS two, Int64(0) AS three
2669+
EmptyRelation: rows=1
2670+
"
2671+
);
2672+
}
2673+
2674+
#[test]
2675+
fn union_with_duplicate_literals() {
2676+
let sql = "SELECT 1 as a, 2 as b UNION SELECT 3, 3";
2677+
let plan = logical_plan(sql).unwrap();
2678+
assert_snapshot!(
2679+
plan,
2680+
@r"
2681+
Distinct:
2682+
Union
2683+
Projection: Int64(1) AS a, Int64(2) AS b
2684+
EmptyRelation: rows=1
2685+
Projection: Int64(3) AS a, Int64(3) AS b
2686+
EmptyRelation: rows=1
2687+
"
2688+
);
2689+
}
2690+
2691+
#[test]
2692+
fn intersect_with_duplicate_literals() {
2693+
let sql = "SELECT 1 as a, 0 as b, 0 as c INTERSECT SELECT 1, 0, 0";
2694+
let plan = logical_plan(sql).unwrap();
2695+
assert_snapshot!(
2696+
plan,
2697+
@r"
2698+
LeftSemi Join: left.a = right.a, left.b = right.b, left.c = right.c
2699+
Distinct:
2700+
SubqueryAlias: left
2701+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2702+
EmptyRelation: rows=1
2703+
SubqueryAlias: right
2704+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2705+
EmptyRelation: rows=1
2706+
"
2707+
);
2708+
}
2709+
2710+
#[test]
2711+
fn except_with_duplicate_literals() {
2712+
let sql = "SELECT 1 as a, 0 as b, 0 as c EXCEPT SELECT 2, 0, 0";
2713+
let plan = logical_plan(sql).unwrap();
2714+
assert_snapshot!(
2715+
plan,
2716+
@r"
2717+
LeftAnti Join: left.a = right.a, left.b = right.b, left.c = right.c
2718+
Distinct:
2719+
SubqueryAlias: left
2720+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2721+
EmptyRelation: rows=1
2722+
SubqueryAlias: right
2723+
Projection: Int64(2) AS a, Int64(0) AS b, Int64(0) AS c
2724+
EmptyRelation: rows=1
2725+
"
2726+
);
2727+
}
2728+
2729+
#[test]
2730+
fn nested_union_with_duplicate_literals() {
2731+
let sql = "SELECT 'a' as x, 1 as y UNION ALL SELECT 'b', 1 UNION ALL SELECT 'c', 1";
2732+
let plan = logical_plan(sql).unwrap();
2733+
assert_snapshot!(
2734+
plan,
2735+
@r#"
2736+
Union
2737+
Union
2738+
Projection: Utf8("a") AS x, Int64(1) AS y
2739+
EmptyRelation: rows=1
2740+
Projection: Utf8("b") AS x, Int64(1) AS y
2741+
EmptyRelation: rows=1
2742+
Projection: Utf8("c") AS x, Int64(1) AS y
2743+
EmptyRelation: rows=1
2744+
"#
2745+
);
2746+
}
2747+
2748+
#[test]
2749+
fn right_nested_union_with_duplicate_literals() {
2750+
let sql = "SELECT 1 as a, 0 as b UNION ALL (SELECT 2, 0 UNION ALL SELECT 3, 0)";
2751+
let plan = logical_plan(sql).unwrap();
2752+
assert_snapshot!(
2753+
plan,
2754+
@r"
2755+
Union
2756+
Projection: Int64(1) AS a, Int64(0) AS b
2757+
EmptyRelation: rows=1
2758+
Union
2759+
Projection: Int64(2) AS a, Int64(0) AS b
2760+
EmptyRelation: rows=1
2761+
Projection: Int64(3) AS a, Int64(0) AS b
2762+
EmptyRelation: rows=1
2763+
"
2764+
);
2765+
}
2766+
26582767
#[test]
26592768
fn empty_over() {
26602769
let sql = "SELECT order_id, MAX(order_id) OVER () from orders";

datafusion/sqllogictest/test_files/union.slt

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -976,3 +976,38 @@ query TITT
976976
END
977977
----
978978
ns3 4 t3 m
979+
980+
# Test UNION ALL with duplicate literal values on the right side
981+
# The right side has duplicate 0 literals which should inherit column names from the left
982+
query III rowsort
983+
SELECT 1 as a, 0 as b, 0 as c
984+
UNION ALL
985+
SELECT 2, 0, 0
986+
----
987+
1 0 0
988+
2 0 0
989+
990+
# Test nested UNION ALL with duplicate literals
991+
query II rowsort
992+
SELECT 1 as a, 0 as b
993+
UNION ALL
994+
SELECT 2, 0
995+
UNION ALL
996+
SELECT 3, 0
997+
----
998+
1 0
999+
2 0
1000+
3 0
1001+
1002+
# Test mixed set operations with nested EXCEPT and INTERSECT
1003+
# Only the first SELECT has named columns, the rest inherit the schema
1004+
query II rowsort
1005+
SELECT 1 as a, 0 as b
1006+
UNION ALL
1007+
(SELECT 2, 0 EXCEPT SELECT 3, 0)
1008+
UNION ALL
1009+
(SELECT 4, 0 INTERSECT SELECT 4, 0)
1010+
----
1011+
1 0
1012+
2 0
1013+
4 0

0 commit comments

Comments
 (0)