Skip to content

Commit 01f2d91

Browse files
alambkrinart
andauthored
[branch-53] Restore Sort unparser guard for correct ORDER BY placement (#20658) (#21523)
- Part of #21079 - Closes #20905 on branch-53 This PR: - Backports #21358 from @alamb to the branch-53 line Co-authored-by: Viktor Yershov <viktor@spice.ai>
1 parent 242fb76 commit 01f2d91

File tree

3 files changed

+57
-2
lines changed

3 files changed

+57
-2
lines changed

datafusion/sql/src/unparser/plan.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,17 @@ impl Unparser<'_> {
499499
)
500500
}
501501
LogicalPlan::Sort(sort) => {
502+
// Sort can be top-level plan for derived table
503+
if select.already_projected() {
504+
return self.derive_with_dialect_alias(
505+
"derived_sort",
506+
plan,
507+
relation,
508+
false,
509+
vec![],
510+
);
511+
}
512+
502513
let Some(query_ref) = query else {
503514
return internal_err!(
504515
"Sort operator only valid in a statement context."

datafusion/sql/src/unparser/rewrite.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,15 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
223223

224224
let mut collects = p.expr.clone();
225225
for sort in &sort.expr {
226-
collects.push(sort.expr.clone());
226+
// Strip aliases from sort expressions so the comparison matches
227+
// the inner Projection's raw expressions. The optimizer may add
228+
// sort expressions to the inner Projection without aliases, while
229+
// the Sort node's expressions carry aliases from the original plan.
230+
let mut expr = sort.expr.clone();
231+
while let Expr::Alias(alias) = expr {
232+
expr = *alias.expr;
233+
}
234+
collects.push(expr);
227235
}
228236

229237
// Compare outer collects Expr::to_string with inner collected transformed values

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1740,6 +1740,42 @@ fn test_sort_with_push_down_fetch() -> Result<()> {
17401740
Ok(())
17411741
}
17421742

1743+
#[test]
1744+
fn test_sort_with_scalar_fn_and_push_down_fetch() -> Result<()> {
1745+
let schema = Schema::new(vec![
1746+
Field::new("search_phrase", DataType::Utf8, false),
1747+
Field::new("event_time", DataType::Utf8, false),
1748+
]);
1749+
1750+
let substr_udf = unicode::substr();
1751+
1752+
// Build a plan that mimics the DF52 optimizer output:
1753+
// Projection(search_phrase) → Sort(substr(event_time), fetch=10)
1754+
// → Projection(search_phrase, event_time) → Filter → TableScan
1755+
// This triggers a subquery because the outer projection differs from the inner one.
1756+
// The ORDER BY scalar function must not reference the inner table qualifier.
1757+
let plan = table_scan(Some("t1"), &schema, None)?
1758+
.filter(col("search_phrase").not_eq(lit("")))?
1759+
.project(vec![col("search_phrase"), col("event_time")])?
1760+
.sort_with_limit(
1761+
vec![
1762+
substr_udf
1763+
.call(vec![col("event_time"), lit(1), lit(5)])
1764+
.sort(true, true),
1765+
],
1766+
Some(10),
1767+
)?
1768+
.project(vec![col("search_phrase")])?
1769+
.build()?;
1770+
1771+
let sql = plan_to_sql(&plan)?;
1772+
assert_snapshot!(
1773+
sql,
1774+
@"SELECT t1.search_phrase FROM (SELECT t1.search_phrase, t1.event_time FROM t1 WHERE (t1.search_phrase <> '') ORDER BY substr(t1.event_time, 1, 5) ASC NULLS FIRST LIMIT 10)"
1775+
);
1776+
Ok(())
1777+
}
1778+
17431779
#[test]
17441780
fn test_join_with_table_scan_filters() -> Result<()> {
17451781
let schema_left = Schema::new(vec![
@@ -1984,7 +2020,7 @@ fn test_complex_order_by_with_grouping() -> Result<()> {
19842020
}, {
19852021
assert_snapshot!(
19862022
sql,
1987-
@r#"SELECT j1.j1_id, j1.j1_string, lochierarchy FROM (SELECT j1.j1_id, j1.j1_string, (grouping(j1.j1_id) + grouping(j1.j1_string)) AS lochierarchy, grouping(j1.j1_string), grouping(j1.j1_id) FROM j1 GROUP BY ROLLUP (j1.j1_id, j1.j1_string)) ORDER BY lochierarchy DESC NULLS FIRST, CASE WHEN (("grouping(j1.j1_id)" + "grouping(j1.j1_string)") = 0) THEN j1.j1_id END ASC NULLS LAST LIMIT 100"#
2023+
@"SELECT j1.j1_id, j1.j1_string, lochierarchy FROM (SELECT j1.j1_id, j1.j1_string, (grouping(j1.j1_id) + grouping(j1.j1_string)) AS lochierarchy, grouping(j1.j1_string), grouping(j1.j1_id) FROM j1 GROUP BY ROLLUP (j1.j1_id, j1.j1_string) ORDER BY lochierarchy DESC NULLS FIRST, CASE WHEN ((grouping(j1.j1_id) + grouping(j1.j1_string)) = 0) THEN j1.j1_id END ASC NULLS LAST) LIMIT 100"
19882024
);
19892025
});
19902026

0 commit comments

Comments
 (0)