Skip to content

Commit 3fdd4f5

Browse files
committed
backport fix for issue apache#19049
1 parent 8ec1a50 commit 3fdd4f5

File tree

2 files changed

+96
-7
lines changed

2 files changed

+96
-7
lines changed

datafusion/expr/src/expr.rs

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -506,17 +506,26 @@ pub type SchemaFieldMetadata = std::collections::HashMap<String, String>;
506506
pub fn intersect_metadata_for_union<'a>(
507507
metadatas: impl IntoIterator<Item = &'a SchemaFieldMetadata>,
508508
) -> SchemaFieldMetadata {
509-
let mut metadatas = metadatas.into_iter();
510-
let Some(mut intersected) = metadatas.next().cloned() else {
511-
return Default::default();
512-
};
509+
let mut intersected: Option<SchemaFieldMetadata> = None;
513510

514511
for metadata in metadatas {
515-
// Only keep keys that exist in both with the same value
516-
intersected.retain(|k, v| metadata.get(k) == Some(v));
512+
// Skip empty metadata (e.g. from NULL literals or computed expressions)
513+
// to avoid dropping metadata from branches that have it.
514+
if metadata.is_empty() {
515+
continue;
516+
}
517+
match &mut intersected {
518+
None => {
519+
intersected = Some(metadata.clone());
520+
}
521+
Some(current) => {
522+
// Only keep keys that exist in both with the same value
523+
current.retain(|k, v| metadata.get(k) == Some(v));
524+
}
525+
}
517526
}
518527

519-
intersected
528+
intersected.unwrap_or_default()
520529
}
521530

522531
/// UNNEST expression.
@@ -3997,4 +4006,67 @@ mod test {
39974006
}
39984007
}
39994008
}
4009+
4010+
mod intersect_metadata_tests {
4011+
use super::super::intersect_metadata_for_union;
4012+
use std::collections::HashMap;
4013+
4014+
#[test]
4015+
fn all_branches_same_metadata() {
4016+
let m1 = HashMap::from([("key".into(), "val".into())]);
4017+
let m2 = HashMap::from([("key".into(), "val".into())]);
4018+
let result = intersect_metadata_for_union([&m1, &m2]);
4019+
assert_eq!(result, HashMap::from([("key".into(), "val".into())]));
4020+
}
4021+
4022+
#[test]
4023+
fn conflicting_metadata_dropped() {
4024+
let m1 = HashMap::from([("key".into(), "a".into())]);
4025+
let m2 = HashMap::from([("key".into(), "b".into())]);
4026+
let result = intersect_metadata_for_union([&m1, &m2]);
4027+
assert!(result.is_empty());
4028+
}
4029+
4030+
#[test]
4031+
fn empty_metadata_branch_skipped() {
4032+
let m1 = HashMap::from([("key".into(), "val".into())]);
4033+
let m2 = HashMap::new(); // e.g. NULL literal
4034+
let result = intersect_metadata_for_union([&m1, &m2]);
4035+
assert_eq!(result, HashMap::from([("key".into(), "val".into())]));
4036+
}
4037+
4038+
#[test]
4039+
fn empty_metadata_first_branch_skipped() {
4040+
let m1 = HashMap::new();
4041+
let m2 = HashMap::from([("key".into(), "val".into())]);
4042+
let result = intersect_metadata_for_union([&m1, &m2]);
4043+
assert_eq!(result, HashMap::from([("key".into(), "val".into())]));
4044+
}
4045+
4046+
#[test]
4047+
fn all_branches_empty_metadata() {
4048+
let m1: HashMap<String, String> = HashMap::new();
4049+
let m2: HashMap<String, String> = HashMap::new();
4050+
let result = intersect_metadata_for_union([&m1, &m2]);
4051+
assert!(result.is_empty());
4052+
}
4053+
4054+
#[test]
4055+
fn mixed_empty_and_conflicting() {
4056+
let m1 = HashMap::from([("key".into(), "a".into())]);
4057+
let m2 = HashMap::new();
4058+
let m3 = HashMap::from([("key".into(), "b".into())]);
4059+
let result = intersect_metadata_for_union([&m1, &m2, &m3]);
4060+
// m2 is skipped; m1 and m3 conflict → dropped
4061+
assert!(result.is_empty());
4062+
}
4063+
4064+
#[test]
4065+
fn no_inputs() {
4066+
let result = intersect_metadata_for_union(std::iter::empty::<
4067+
&HashMap<String, String>,
4068+
>());
4069+
assert!(result.is_empty());
4070+
}
4071+
}
40004072
}

datafusion/sqllogictest/test_files/metadata.slt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,23 @@ ORDER BY id, name, l_name;
123123
NULL bar NULL
124124
NULL NULL l_bar
125125

126+
# Regression test: UNION ALL + optimize_projections pruning unused columns causes
127+
# metadata loss when one branch has NULL literals (empty metadata) and the other
128+
# has field metadata. The optimizer prunes unused columns, triggering recompute_schema
129+
# which rebuilds the Union schema via intersect_metadata_for_union. Previously, this
130+
# intersection would drop metadata when any branch had empty metadata (from NULL literals).
131+
# See https://github.com/apache/datafusion/issues/19049
132+
query T
133+
SELECT name FROM (
134+
SELECT id, name FROM "table_with_metadata"
135+
UNION ALL
136+
SELECT NULL::int as id, NULL::string as name
137+
) GROUP BY name ORDER BY name;
138+
----
139+
bar
140+
baz
141+
NULL
142+
126143
# Regression test: missing field metadata from left side of the union when right side is chosen
127144
query T
128145
select name from (

0 commit comments

Comments
 (0)