Skip to content

Commit 66c5fcc

Browse files
committed
fix: inherit field names from left projection in set expressions when missing
Otherwise they might end up having the same column names, which is prohibited. This is only the case for literals, other expression types should have default unique names.
1 parent 37b9a46 commit 66c5fcc

File tree

3 files changed

+176
-3
lines changed

3 files changed

+176
-3
lines changed

datafusion/sql/src/set_expr.rs

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
1919
use datafusion_common::{
20-
DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err,
20+
DFSchemaRef, DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err,
2121
};
2222
use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
23-
use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier, Spanned};
23+
use sqlparser::ast::{
24+
Expr as SQLExpr, Ident, SelectItem, SetExpr, SetOperator, SetQuantifier, Spanned,
25+
};
2426

2527
impl<S: ContextProvider> SqlToRel<'_, S> {
2628
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
@@ -42,7 +44,28 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
4244
let left_span = Span::try_from_sqlparser_span(left.span());
4345
let right_span = Span::try_from_sqlparser_span(right.span());
4446
let left_plan = self.set_expr_to_plan(*left, planner_context);
45-
let right_plan = self.set_expr_to_plan(*right, planner_context);
47+
48+
// For non-*ByName operations, add missing aliases to right side using left schema's
49+
// column names. This allows queries like
50+
// `SELECT 1 c1, 0 c2, 0 c3 UNION ALL SELECT 2, 0, 0`
51+
// where the right side has duplicate literal values.
52+
// We only do this if the left side succeeded.
53+
let right = if let Ok(plan) = &left_plan
54+
&& plan.schema().fields().len() > 1
55+
&& matches!(
56+
set_quantifier,
57+
SetQuantifier::All
58+
| SetQuantifier::Distinct
59+
| SetQuantifier::None
60+
) {
61+
alias_set_expr(*right, plan.schema())
62+
} else {
63+
*right
64+
};
65+
66+
let right_plan = self.set_expr_to_plan(right, planner_context);
67+
68+
// Handle errors from both sides, collecting them if both failed
4669
let (left_plan, right_plan) = match (left_plan, right_plan) {
4770
(Ok(left_plan), Ok(right_plan)) => (left_plan, right_plan),
4871
(Err(left_err), Err(right_err)) => {
@@ -160,3 +183,75 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
160183
}
161184
}
162185
}
186+
187+
// Adds aliases to SELECT items in a SetExpr using the provided schema.
188+
// This ensures that unnamed expressions on the right side of a UNION/INTERSECT/EXCEPT
189+
// get aliased with the column names from the left side, allowing queries like
190+
// `SELECT 1 AS a, 0 AS b, 0 AS c UNION ALL SELECT 2, 0, 0` to work correctly.
191+
fn alias_set_expr(set_expr: SetExpr, schema: &DFSchemaRef) -> SetExpr {
192+
match set_expr {
193+
SetExpr::Select(mut select) => {
194+
alias_select_items(&mut select.projection, schema);
195+
SetExpr::Select(select)
196+
}
197+
SetExpr::SetOperation {
198+
op,
199+
left,
200+
right,
201+
set_quantifier,
202+
} => {
203+
// For nested set operations, only alias the leftmost branch
204+
// since that's what determines the output column names
205+
SetExpr::SetOperation {
206+
op,
207+
left: Box::new(alias_set_expr(*left, schema)),
208+
right,
209+
set_quantifier,
210+
}
211+
}
212+
SetExpr::Query(mut query) => {
213+
// Handle parenthesized queries like (SELECT ... UNION ALL SELECT ...)
214+
query.body = Box::new(alias_set_expr(*query.body, schema));
215+
SetExpr::Query(query)
216+
}
217+
// For other cases (Values, etc.), return as-is
218+
other => other,
219+
}
220+
}
221+
222+
// Adds aliases to literal value expressions where missing, based on the input schema, as these are
223+
// the ones that can cause duplicate name issues (e.g. `SELECT 0, 0` has two columns named `Int64(0)`).
224+
// Other expressions typically have unique names.
225+
fn alias_select_items(items: &mut [SelectItem], schema: &DFSchemaRef) {
226+
let mut col_idx = 0;
227+
for item in items.iter_mut() {
228+
match item {
229+
SelectItem::UnnamedExpr(expr) if is_literal_value(expr) => {
230+
if let Some(field) = schema.fields().get(col_idx) {
231+
*item = SelectItem::ExprWithAlias {
232+
expr: expr.clone(),
233+
alias: Ident::new(field.name()),
234+
};
235+
}
236+
col_idx += 1;
237+
}
238+
SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
239+
col_idx += 1;
240+
}
241+
SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => {
242+
// Wildcards expand to multiple columns - skip position tracking
243+
}
244+
}
245+
}
246+
}
247+
248+
/// Returns true if the expression is a literal value that could cause duplicate names.
249+
fn is_literal_value(expr: &SQLExpr) -> bool {
250+
matches!(
251+
expr,
252+
SQLExpr::Value(_)
253+
| SQLExpr::UnaryOp { .. }
254+
| SQLExpr::TypedString { .. }
255+
| SQLExpr::Interval(_)
256+
)
257+
}

datafusion/sql/tests/sql_integration.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,6 +2655,60 @@ fn union_all_by_name_same_column_names() {
26552655
);
26562656
}
26572657

2658+
#[test]
2659+
fn union_all_with_duplicate_literals() {
2660+
let sql = "SELECT 0 a, 0 b UNION ALL SELECT 1, 1";
2661+
let plan = logical_plan(sql).unwrap();
2662+
assert_snapshot!(
2663+
plan,
2664+
@r"
2665+
Union
2666+
Projection: Int64(0) AS a, Int64(0) AS b
2667+
EmptyRelation: rows=1
2668+
Projection: Int64(1) AS a, Int64(1) AS b
2669+
EmptyRelation: rows=1
2670+
"
2671+
);
2672+
}
2673+
2674+
#[test]
2675+
fn intersect_with_duplicate_literals() {
2676+
let sql = "SELECT 1 as a, 0 as b, 0 as c INTERSECT SELECT 1, 0, 0";
2677+
let plan = logical_plan(sql).unwrap();
2678+
assert_snapshot!(
2679+
plan,
2680+
@r"
2681+
LeftSemi Join: left.a = right.a, left.b = right.b, left.c = right.c
2682+
Distinct:
2683+
SubqueryAlias: left
2684+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2685+
EmptyRelation: rows=1
2686+
SubqueryAlias: right
2687+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2688+
EmptyRelation: rows=1
2689+
"
2690+
);
2691+
}
2692+
2693+
#[test]
2694+
fn except_with_duplicate_literals() {
2695+
let sql = "SELECT 1 as a, 0 as b, 0 as c EXCEPT SELECT 2, 0, 0";
2696+
let plan = logical_plan(sql).unwrap();
2697+
assert_snapshot!(
2698+
plan,
2699+
@r"
2700+
LeftAnti Join: left.a = right.a, left.b = right.b, left.c = right.c
2701+
Distinct:
2702+
SubqueryAlias: left
2703+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2704+
EmptyRelation: rows=1
2705+
SubqueryAlias: right
2706+
Projection: Int64(2) AS a, Int64(0) AS b, Int64(0) AS c
2707+
EmptyRelation: rows=1
2708+
"
2709+
);
2710+
}
2711+
26582712
#[test]
26592713
fn empty_over() {
26602714
let sql = "SELECT order_id, MAX(order_id) OVER () from orders";

datafusion/sqllogictest/test_files/union.slt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,30 @@ Bob_new
256256
John
257257
John_new
258258

259+
# Test UNION ALL with unaliased duplicate literal values on the right side.
260+
# The second projection will inherit field names from the first one, and so
261+
# pass the unique projection expression name check.
262+
query TII rowsort
263+
SELECT name, 1 as table, 1 as row FROM t1 WHERE id = 1
264+
UNION ALL
265+
SELECT name, 2, 2 FROM t2 WHERE id = 2
266+
----
267+
Alex 1 1
268+
Bob 2 2
269+
270+
# Test nested UNION, EXCEPT, INTERSECT with duplicate unaliased literals
271+
# Only the first SELECT has column aliases, all others have duplicate 0 literals
272+
query III rowsort
273+
SELECT 1 as a, 0 as b, 0 as c
274+
UNION ALL
275+
((SELECT 2, 0, 0 UNION ALL SELECT 3, 0, 0) EXCEPT SELECT 3, 0, 0)
276+
UNION ALL
277+
(SELECT 4, 0, 0 INTERSECT SELECT 4, 0, 0)
278+
----
279+
1 0 0
280+
2 0 0
281+
4 0 0
282+
259283
# Plan is unnested
260284
query TT
261285
EXPLAIN SELECT name FROM t1 UNION ALL (SELECT name from t2 UNION ALL SELECT name || '_new' from t2)

0 commit comments

Comments
 (0)