Skip to content

Commit 15cf2d7

Browse files
committed
fix: inherit field names from left projection in set expressions when missing
Otherwise they might end up having the same column names, which is prohibited. This is only the case for literals, other expression types should have default unique names.
1 parent 37b9a46 commit 15cf2d7

File tree

3 files changed

+218
-3
lines changed

3 files changed

+218
-3
lines changed

datafusion/sql/src/set_expr.rs

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
1919
use datafusion_common::{
20-
DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err,
20+
DFSchemaRef, DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err,
2121
};
2222
use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
23-
use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier, Spanned};
23+
use sqlparser::ast::{
24+
Expr as SQLExpr, Ident, SelectItem, SetExpr, SetOperator, SetQuantifier, Spanned,
25+
};
2426

2527
impl<S: ContextProvider> SqlToRel<'_, S> {
2628
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
@@ -42,7 +44,28 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
4244
let left_span = Span::try_from_sqlparser_span(left.span());
4345
let right_span = Span::try_from_sqlparser_span(right.span());
4446
let left_plan = self.set_expr_to_plan(*left, planner_context);
45-
let right_plan = self.set_expr_to_plan(*right, planner_context);
47+
48+
// For non-*ByName operations, add missing aliases to right side using left schema's
49+
// column names. This allows queries like
50+
// `SELECT 1 c1, 0 c2, 0 c3 UNION ALL SELECT 2, 0, 0`
51+
// where the right side has duplicate literal values.
52+
// We only do this if the left side succeeded.
53+
let right = if let Ok(plan) = &left_plan
54+
&& plan.schema().fields().len() > 1
55+
&& matches!(
56+
set_quantifier,
57+
SetQuantifier::All
58+
| SetQuantifier::Distinct
59+
| SetQuantifier::None
60+
) {
61+
alias_set_expr(*right, plan.schema())
62+
} else {
63+
*right
64+
};
65+
66+
let right_plan = self.set_expr_to_plan(right, planner_context);
67+
68+
// Handle errors from both sides, collecting them if both failed
4669
let (left_plan, right_plan) = match (left_plan, right_plan) {
4770
(Ok(left_plan), Ok(right_plan)) => (left_plan, right_plan),
4871
(Err(left_err), Err(right_err)) => {
@@ -160,3 +183,75 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
160183
}
161184
}
162185
}
186+
187+
// Adds aliases to SELECT items in a SetExpr using the provided schema.
188+
// This ensures that unnamed expressions on the right side of a UNION/INTERSECT/EXCEPT
189+
// get aliased with the column names from the left side, allowing queries like
190+
// `SELECT 1 AS a, 0 AS b, 0 AS c UNION ALL SELECT 2, 0, 0` to work correctly.
191+
fn alias_set_expr(set_expr: SetExpr, schema: &DFSchemaRef) -> SetExpr {
192+
match set_expr {
193+
SetExpr::Select(mut select) => {
194+
alias_select_items(&mut select.projection, schema);
195+
SetExpr::Select(select)
196+
}
197+
SetExpr::SetOperation {
198+
op,
199+
left,
200+
right,
201+
set_quantifier,
202+
} => {
203+
// For nested set operations, only alias the leftmost branch
204+
// since that's what determines the output column names
205+
SetExpr::SetOperation {
206+
op,
207+
left: Box::new(alias_set_expr(*left, schema)),
208+
right,
209+
set_quantifier,
210+
}
211+
}
212+
SetExpr::Query(mut query) => {
213+
// Handle parenthesized queries like (SELECT ... UNION ALL SELECT ...)
214+
query.body = Box::new(alias_set_expr(*query.body, schema));
215+
SetExpr::Query(query)
216+
}
217+
// For other cases (Values, etc.), return as-is
218+
other => other,
219+
}
220+
}
221+
222+
// Adds aliases to literal value expressions where missing, based on the input schema, as these are
223+
// the ones that can cause duplicate name issues (e.g. `SELECT 0, 0` has two columns named `Int64(0)`).
224+
// Other expressions typically have unique names.
225+
fn alias_select_items(items: &mut [SelectItem], schema: &DFSchemaRef) {
226+
let mut col_idx = 0;
227+
for item in items.iter_mut() {
228+
match item {
229+
SelectItem::UnnamedExpr(expr) if is_literal_value(expr) => {
230+
if let Some(field) = schema.fields().get(col_idx) {
231+
*item = SelectItem::ExprWithAlias {
232+
expr: expr.clone(),
233+
alias: Ident::new(field.name()),
234+
};
235+
}
236+
col_idx += 1;
237+
}
238+
SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
239+
col_idx += 1;
240+
}
241+
SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => {
242+
// Wildcards expand to multiple columns - skip position tracking
243+
}
244+
}
245+
}
246+
}
247+
248+
/// Returns true if the expression is a literal value that could cause duplicate names.
249+
fn is_literal_value(expr: &SQLExpr) -> bool {
250+
matches!(
251+
expr,
252+
SQLExpr::Value(_)
253+
| SQLExpr::UnaryOp { .. }
254+
| SQLExpr::TypedString { .. }
255+
| SQLExpr::Interval(_)
256+
)
257+
}

datafusion/sql/tests/sql_integration.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,6 +2655,115 @@ fn union_all_by_name_same_column_names() {
26552655
);
26562656
}
26572657

2658+
#[test]
2659+
fn union_all_with_duplicate_literals() {
2660+
let sql = "SELECT 1 as one, 0 as two, 0 as three UNION ALL SELECT 2, 0, 0";
2661+
let plan = logical_plan(sql).unwrap();
2662+
assert_snapshot!(
2663+
plan,
2664+
@r"
2665+
Union
2666+
Projection: Int64(1) AS one, Int64(0) AS two, Int64(0) AS three
2667+
EmptyRelation: rows=1
2668+
Projection: Int64(2) AS one, Int64(0) AS two, Int64(0) AS three
2669+
EmptyRelation: rows=1
2670+
"
2671+
);
2672+
}
2673+
2674+
#[test]
2675+
fn union_with_duplicate_literals() {
2676+
let sql = "SELECT 1 as a, 2 as b UNION SELECT 3, 3";
2677+
let plan = logical_plan(sql).unwrap();
2678+
assert_snapshot!(
2679+
plan,
2680+
@r"
2681+
Distinct:
2682+
Union
2683+
Projection: Int64(1) AS a, Int64(2) AS b
2684+
EmptyRelation: rows=1
2685+
Projection: Int64(3) AS a, Int64(3) AS b
2686+
EmptyRelation: rows=1
2687+
"
2688+
);
2689+
}
2690+
2691+
#[test]
2692+
fn intersect_with_duplicate_literals() {
2693+
let sql = "SELECT 1 as a, 0 as b, 0 as c INTERSECT SELECT 1, 0, 0";
2694+
let plan = logical_plan(sql).unwrap();
2695+
assert_snapshot!(
2696+
plan,
2697+
@r"
2698+
LeftSemi Join: left.a = right.a, left.b = right.b, left.c = right.c
2699+
Distinct:
2700+
SubqueryAlias: left
2701+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2702+
EmptyRelation: rows=1
2703+
SubqueryAlias: right
2704+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2705+
EmptyRelation: rows=1
2706+
"
2707+
);
2708+
}
2709+
2710+
#[test]
2711+
fn except_with_duplicate_literals() {
2712+
let sql = "SELECT 1 as a, 0 as b, 0 as c EXCEPT SELECT 2, 0, 0";
2713+
let plan = logical_plan(sql).unwrap();
2714+
assert_snapshot!(
2715+
plan,
2716+
@r"
2717+
LeftAnti Join: left.a = right.a, left.b = right.b, left.c = right.c
2718+
Distinct:
2719+
SubqueryAlias: left
2720+
Projection: Int64(1) AS a, Int64(0) AS b, Int64(0) AS c
2721+
EmptyRelation: rows=1
2722+
SubqueryAlias: right
2723+
Projection: Int64(2) AS a, Int64(0) AS b, Int64(0) AS c
2724+
EmptyRelation: rows=1
2725+
"
2726+
);
2727+
}
2728+
2729+
#[test]
2730+
fn nested_union_with_duplicate_literals() {
2731+
let sql = "SELECT 'a' as x, 1 as y UNION ALL SELECT 'b', 1 UNION ALL SELECT 'c', 1";
2732+
let plan = logical_plan(sql).unwrap();
2733+
assert_snapshot!(
2734+
plan,
2735+
@r#"
2736+
Union
2737+
Union
2738+
Projection: Utf8("a") AS x, Int64(1) AS y
2739+
EmptyRelation: rows=1
2740+
Projection: Utf8("b") AS x, Int64(1) AS y
2741+
EmptyRelation: rows=1
2742+
Projection: Utf8("c") AS x, Int64(1) AS y
2743+
EmptyRelation: rows=1
2744+
"#
2745+
);
2746+
}
2747+
2748+
#[test]
2749+
fn right_nested_union_with_duplicate_literals() {
2750+
let sql = "SELECT 1 as a, 0 as b UNION ALL (SELECT 2, 0 UNION ALL SELECT 3, 0)";
2751+
let plan = logical_plan(sql).unwrap();
2752+
assert_snapshot!(
2753+
plan,
2754+
@r"
2755+
Union
2756+
Projection: Int64(1) AS a, Int64(0) AS b
2757+
EmptyRelation: rows=1
2758+
Union
2759+
Projection: Int64(2) AS a, Int64(0) AS b
2760+
EmptyRelation: rows=1
2761+
Projection: Int64(3) AS a, Int64(0) AS b
2762+
EmptyRelation: rows=1
2763+
"
2764+
);
2765+
}
2766+
26582767
#[test]
26592768
fn empty_over() {
26602769
let sql = "SELECT order_id, MAX(order_id) OVER () from orders";

datafusion/sqllogictest/test_files/union.slt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,17 @@ Bob_new
256256
John
257257
John_new
258258

259+
# Test UNION ALL with unaliased duplicate literal values on the right side.
260+
# The second projection will inherit field names from the first one, and so
261+
# pass the unique projection expression name check.
262+
query TII rowsort
263+
SELECT name, 1 as table, 1 as row FROM t1 WHERE id = 1
264+
UNION ALL
265+
SELECT name, 2, 2 FROM t2 WHERE id = 2
266+
----
267+
Alex 1 1
268+
Bob 2 2
269+
259270
# Plan is unnested
260271
query TT
261272
EXPLAIN SELECT name FROM t1 UNION ALL (SELECT name from t2 UNION ALL SELECT name || '_new' from t2)

0 commit comments

Comments
 (0)