Skip to content

Commit 985b19a

Browse files
[VL] Fix native Union result name (#11832)
Fix native union result use column type name as column name, which lead to same data type column has same data, but is not right result. eg all string columns has same data value as the first string column const auto name = outRowType->childAt(colIdx)->name(); result name is column type name => Wrong const auto name = outRowType->nameOf(colIdx); result name is column name => Correct Co-authored-by: lifulong <lifulong@zhihu.com>
1 parent f350a44 commit 985b19a

2 files changed

Lines changed: 51 additions & 1 deletion

File tree

backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,55 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa
582582
}
583583
}
584584

585+
test("native union_all with two level union keeps distinct output columns") {
586+
withTempView("union_src_a", "union_src_b", "union_src_c") {
587+
Seq(
588+
("valueA", "value1", "value11", "value111"),
589+
("valueA", "value2", "value22", "value222")
590+
).toDF("col1", "col2", "col3", "col4")
591+
.createOrReplaceTempView("union_src_a")
592+
Seq(
593+
("valueB", "value3", "value33", "value333"),
594+
("valueB", "value4", "value44", "value444")
595+
).toDF("col1", "col2", "col3", "col4")
596+
.createOrReplaceTempView("union_src_b")
597+
598+
withSQLConf(GlutenConfig.NATIVE_UNION_ENABLED.key -> "true") {
599+
compareDfResultsAgainstVanillaSpark(
600+
() =>
601+
spark.sql("""
602+
|with deduplicated_data as (
603+
| select col1, col2, col3, col4
604+
| from (
605+
| select
606+
| u.col1,
607+
| u.col2,
608+
| u.col3,
609+
| u.col4,
610+
| row_number() over (partition by u.col2 order by u.col5 desc) as rn
611+
| from (
612+
| select col1, col2, col3, col4, 98 as col5 from union_src_a
613+
| union all
614+
| select col1, col2, col3, col4, 100 as col5 from union_src_b
615+
| ) u
616+
| ) t
617+
| where t.rn = 1
618+
|)
619+
|select col1, col2, col3, col4
620+
|from deduplicated_data
621+
|where col1 != 'valueC'
622+
|union all
623+
|select col1, col2, col3, col4
624+
|from deduplicated_data
625+
|where col1 = 'valueC'
626+
|""".stripMargin),
627+
compareResult = true,
628+
checkGlutenPlan[UnionExecTransformer]
629+
)
630+
}
631+
}
632+
}
633+
585634
test("union two tables") {
586635
runQueryAndCompare("""
587636
|select count(orderkey) from (

cpp/velox/substrait/SubstraitToVeloxPlan.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1244,7 +1244,8 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
12441244
const RowTypePtr outRowType = asRowType(children[0]->outputType());
12451245
std::vector<std::string> outNames;
12461246
for (int32_t colIdx = 0; colIdx < outRowType->size(); ++colIdx) {
1247-
const auto name = outRowType->childAt(colIdx)->name();
1247+
// Using field names from the unified output row type instead child type names
1248+
const auto name = outRowType->nameOf(colIdx);
12481249
outNames.push_back(name);
12491250
}
12501251

0 commit comments

Comments
 (0)