Skip to content

Commit ed46d9d

Browse files
fix: Unparser drops column alias needed by ORDER BY when flattening Projection
In `rewrite_plan_for_sort_on_non_projected_fields`, when the outer Projection excludes a Sort column that was defined as an alias in the inner Projection (e.g. `Z AS c`), the rewrite replaced the inner Projection's expressions with only the outer Projection's mapped expressions, dropping the alias definition. This left ORDER BY referencing a non-existent column. The fix inlines the underlying physical expression into the Sort when an alias is dropped (e.g. `ORDER BY c` → `ORDER BY t."Z"`). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent ab8acf9 commit ed46d9d

2 files changed

Lines changed: 94 additions & 0 deletions

File tree

datafusion/sql/src/unparser/rewrite.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,46 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
254254
.map(|e| map.get(e).unwrap_or(e).clone())
255255
.collect::<Vec<_>>();
256256

257+
// Build a reverse map from alias name → underlying expression for
258+
// sort columns that are dropped from the outer projection. When
259+
// the inner Projection is trimmed to `new_exprs` below, any alias
260+
// that only existed in the inner Projection disappears, so ORDER BY
261+
// references to it become dangling. We inline the physical
262+
// expression instead (e.g. ORDER BY "c" → ORDER BY "Z").
263+
let projected_aliases: HashSet<&str> = new_exprs
264+
.iter()
265+
.filter_map(|e| match e {
266+
Expr::Alias(alias) => Some(alias.name.as_str()),
267+
_ => None,
268+
})
269+
.collect();
270+
271+
let alias_to_underlying: HashMap<String, Expr> = inner_p
272+
.expr
273+
.iter()
274+
.filter_map(|e| match e {
275+
Expr::Alias(alias) if !projected_aliases.contains(alias.name.as_str()) => {
276+
Some((alias.name.clone(), (*alias.expr).clone()))
277+
}
278+
_ => None,
279+
})
280+
.collect();
281+
282+
if !alias_to_underlying.is_empty() {
283+
for sort_expr in &mut sort.expr {
284+
let mut expr = sort_expr.expr.clone();
285+
while let Expr::Alias(alias) = expr {
286+
expr = *alias.expr;
287+
}
288+
if let Expr::Column(ref col) = expr {
289+
let name = col.name();
290+
if let Some(underlying) = alias_to_underlying.get(name) {
291+
sort_expr.expr = underlying.clone();
292+
}
293+
}
294+
}
295+
}
296+
257297
inner_p.expr.clone_from(&new_exprs);
258298
sort.input = Arc::new(LogicalPlan::Projection(inner_p));
259299

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2973,3 +2973,57 @@ fn test_unparse_manual_join_with_subquery_aggregate() -> Result<()> {
29732973

29742974
Ok(())
29752975
}
2976+
2977+
/// Regression test: when the outer Projection excludes a Sort column that was
2978+
/// defined as an alias in the inner Projection (through a SubqueryAlias), the
2979+
/// Unparser must either preserve the subquery boundary or inline the physical
2980+
/// column name into the ORDER BY clause.
2981+
///
2982+
/// See: https://github.com/apache/datafusion/issues/XXXX
2983+
#[test]
2984+
fn test_sort_on_aliased_column_dropped_by_outer_projection() -> Result<()> {
2985+
let schema = Schema::new(vec![
2986+
Field::new("X", DataType::Utf8, true),
2987+
Field::new("Y", DataType::Utf8, true),
2988+
Field::new("Z", DataType::Utf8, true),
2989+
]);
2990+
2991+
// Build:
2992+
// Projection: [a, b] -- outer: excludes sort column "c"
2993+
// Sort: [c DESC, fetch=1] -- references alias "c"
2994+
// Projection: [X AS a, Y AS b, Z AS c] -- defines alias "c"
2995+
// SubqueryAlias: t
2996+
// TableScan: phys_table [X, Y, Z]
2997+
let plan = table_scan(Some("phys_table"), &schema, None)?
2998+
.alias("t")?
2999+
.project(vec![
3000+
Expr::Column(Column::new(Some(TableReference::bare("t")), "X"))
3001+
.alias("a"),
3002+
Expr::Column(Column::new(Some(TableReference::bare("t")), "Y"))
3003+
.alias("b"),
3004+
Expr::Column(Column::new(Some(TableReference::bare("t")), "Z"))
3005+
.alias("c"),
3006+
])?
3007+
.sort_with_limit(
3008+
vec![Expr::Column(Column::new_unqualified("c")).sort(false, true)],
3009+
Some(1),
3010+
)?
3011+
.project(vec![
3012+
Expr::Column(Column::new_unqualified("a")),
3013+
Expr::Column(Column::new_unqualified("b")),
3014+
])?
3015+
.build()?;
3016+
3017+
let unparser = Unparser::default();
3018+
let sql = unparser.plan_to_sql(&plan)?;
3019+
3020+
// The sort column "c" (aliased from "Z" in the inner Projection) must be
3021+
// inlined to the physical column in ORDER BY, since the outer Projection
3022+
// excludes it from the SELECT list.
3023+
assert_snapshot!(
3024+
sql,
3025+
@r#"SELECT t."X" AS a, t."Y" AS b FROM phys_table AS t ORDER BY t."Z" DESC NULLS FIRST LIMIT 1"#
3026+
);
3027+
3028+
Ok(())
3029+
}

0 commit comments

Comments
 (0)