From 76150b278442941cebf20f2ff3593baae5df2581 Mon Sep 17 00:00:00 2001
From: xiedeyantu <czjourney@163.com>
Date: Wed, 8 Apr 2026 22:05:41 +0800
Subject: [PATCH 1/4] fix: raise AmbiguousReference error for duplicate column
 names in subquery

---
 datafusion/common/src/column.rs              |  32 ++++-
 datafusion/common/src/dfschema.rs            |  94 ++++++++++++-
 datafusion/core/src/physical_planner.rs      |   2 +-
 datafusion/expr/src/logical_plan/builder.rs  |  25 +++-
 datafusion/expr/src/logical_plan/plan.rs     |  59 ++++++++-
 datafusion/sql/tests/sql_integration.rs      |  13 ++
 datafusion/sqllogictest/test_files/joins.slt | 132 +++++++++++++++++++
 7 files changed, 351 insertions(+), 6 deletions(-)
diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index c7f0b5a4f4881..9f1587bbf74b5 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -237,7 +237,37 @@ impl Column {
                 .collect::<Vec<_>>();
             match qualified_fields.len() {
                 0 => continue,
-                1 => return Ok(Column::from(qualified_fields[0])),
+                1 => {
+                    // Even a single structural match must be rejected when the
+                    // schema itself has flagged the name as ambiguous (e.g. a
+                    // derived-table subquery that contained two columns with
+                    // the same unqualified name).
+                    let is_ambiguous = schema_level
+                        .iter()
+                        .any(|s| s.ambiguous_names().contains(&self.name));
+                    if is_ambiguous {
+                        return _schema_err!(SchemaError::AmbiguousReference {
+                            field: Box::new(Column::new_unqualified(&self.name)),
+                        })
+                        .map_err(|err| {
+                            let mut diagnostic = Diagnostic::new_error(
+                                format!("column '{}' is ambiguous", &self.name),
+                                self.spans().first(),
+                            );
+                            let columns = schema_level
+                                .iter()
+                                .flat_map(|s| s.columns_with_unqualified_name(&self.name))
+                                .collect::<Vec<_>>();
+                            add_possible_columns_to_diag(
+                                &mut diagnostic,
+                                &Column::new_unqualified(&self.name),
+                                &columns,
+                            );
+                            err.with_diagnostic(diagnostic)
+                        });
+                    }
+                    return Ok(Column::from(qualified_fields[0]));
+                }
                 _ => {
                     // More than 1 fields in this schema have their names set to self.name.
                     //
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index de0aacf9e8bcd..b44d4d70b0323 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -19,7 +19,7 @@
 //! fields with optional relation names.
 
 use std::collections::{BTreeSet, HashMap, HashSet};
-use std::fmt::{Display, Formatter};
+use std::fmt::{self, Display, Formatter};
 use std::hash::Hash;
 use std::sync::Arc;
 
@@ -108,7 +108,7 @@ pub type DFSchemaRef = Arc<DFSchema>;
 /// let schema: &Schema = df_schema.as_arrow();
 /// assert_eq!(schema.fields().len(), 1);
 /// ```
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Clone, PartialEq, Eq)]
 pub struct DFSchema {
     /// Inner Arrow schema reference.
     inner: SchemaRef,
@@ -117,6 +117,26 @@ pub struct DFSchema {
     field_qualifiers: Vec<Option<TableReference>>,
     /// Stores functional dependencies in the schema.
     functional_dependencies: FunctionalDependencies,
+    /// Field names that are ambiguous in this schema because the underlying
+    /// source (e.g. a derived-table subquery) contained multiple columns with
+    /// the same unqualified name.  Any attempt to reference these names without
+    /// a qualifier should produce an [`SchemaError::AmbiguousReference`] error.
+    ambiguous_names: Option<Arc<HashSet<String>>>,
+}
+
+impl fmt::Debug for DFSchema {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Show the ambiguous-names set as `{}` when it is empty/absent so that
+        // existing Debug snapshots are not affected by the Option wrapper.
+        let empty = HashSet::new();
+        let ambiguous = self.ambiguous_names.as_deref().unwrap_or(&empty);
+        f.debug_struct("DFSchema")
+            .field("inner", &self.inner)
+            .field("field_qualifiers", &self.field_qualifiers)
+            .field("functional_dependencies", &self.functional_dependencies)
+            .field("ambiguous_names", ambiguous)
+            .finish()
+    }
 }
 
 impl DFSchema {
@@ -126,6 +146,7 @@ impl DFSchema {
             inner: Arc::new(Schema::new([])),
             field_qualifiers: vec![],
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         }
     }
 
@@ -157,6 +178,7 @@ impl DFSchema {
             inner: schema,
             field_qualifiers: qualifiers,
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         };
         dfschema.check_names()?;
         Ok(dfschema)
@@ -173,6 +195,7 @@ impl DFSchema {
             inner: schema,
             field_qualifiers: vec![None; field_count],
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         };
         dfschema.check_names()?;
         Ok(dfschema)
@@ -191,6 +214,7 @@ impl DFSchema {
             inner: schema.clone().into(),
             field_qualifiers: vec![Some(qualifier); schema.fields.len()],
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         };
         schema.check_names()?;
         Ok(schema)
@@ -205,6 +229,7 @@ impl DFSchema {
             inner: Arc::clone(schema),
             field_qualifiers: qualifiers,
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         };
         dfschema.check_names()?;
         Ok(dfschema)
@@ -226,6 +251,7 @@ impl DFSchema {
             inner: Arc::clone(&self.inner),
             field_qualifiers: qualifiers,
             functional_dependencies: self.functional_dependencies.clone(),
+            ambiguous_names: self.ambiguous_names.clone(),
         })
     }
 
@@ -275,6 +301,35 @@ impl DFSchema {
         }
     }
 
+    /// Marks the given field names as ambiguous.
+    ///
+    /// Ambiguous names correspond to fields that originated from multiple
+    /// source columns with the same unqualified name (e.g. both sides of a
+    /// JOIN having an `age` column).  Any attempt to resolve such a name
+    /// without a table qualifier will produce an
+    /// [`SchemaError::AmbiguousReference`] error.
+    pub fn with_ambiguous_names(mut self, names: HashSet<String>) -> Self {
+        self.ambiguous_names = if names.is_empty() {
+            None
+        } else {
+            Some(Arc::new(names))
+        };
+        self
+    }
+
+    /// Returns the set of field names that are considered ambiguous in this
+    /// schema.  See [`Self::with_ambiguous_names`].
+    ///
+    /// Returns a reference to an empty set when no ambiguous names have been
+    /// recorded (the common case).
+    pub fn ambiguous_names(&self) -> &HashSet<String> {
+        static EMPTY: std::sync::OnceLock<HashSet<String>> =
+            std::sync::OnceLock::new();
+        self.ambiguous_names
+            .as_deref()
+            .unwrap_or_else(|| EMPTY.get_or_init(HashSet::new))
+    }
+
     /// Create a new schema that contains the fields from this schema followed by the fields
     /// from the supplied schema. An error will be returned if there are duplicate field names.
     pub fn join(&self, schema: &DFSchema) -> Result<Self> {
@@ -294,6 +349,7 @@ impl DFSchema {
             inner: Arc::new(new_schema_with_metadata),
             field_qualifiers: new_qualifiers,
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         };
         new_self.check_names()?;
         Ok(new_self)
@@ -350,6 +406,22 @@ impl DFSchema {
         let finished_with_metadata = finished.with_metadata(metadata);
         self.inner = finished_with_metadata.into();
         self.field_qualifiers.extend(qualifiers);
+        // Propagate ambiguous names from the other schema so that names marked
+        // as ambiguous (e.g. by a JOIN) are not silently dropped when schemas
+        // are merged for ORDER BY / HAVING resolution.
+        if let Some(other_names) = &other_schema.ambiguous_names {
+            match &mut self.ambiguous_names {
+                Some(self_names) => {
+                    // Build a new combined set (Arc prevents in-place mutation).
+                    let mut combined = (**self_names).clone();
+                    combined.extend(other_names.iter().cloned());
+                    self.ambiguous_names = Some(Arc::new(combined));
+                }
+                None => {
+                    self.ambiguous_names = Some(Arc::clone(other_names));
+                }
+            }
+        }
     }
 
     /// Get a list of fields for this schema
@@ -506,6 +578,18 @@ impl DFSchema {
         &self,
         name: &str,
     ) -> Result<(Option<&TableReference>, &FieldRef)> {
+        // If this field name was marked as ambiguous at schema creation time
+        // (e.g. because a derived-table subquery produced duplicate column
+        // names), refuse to resolve it without an explicit qualifier.
+        if self
+            .ambiguous_names
+            .as_ref()
+            .is_some_and(|s| s.contains(name))
+        {
+            return _schema_err!(SchemaError::AmbiguousReference {
+                field: Box::new(Column::new_unqualified(name.to_string()))
+            });
+        }
         let matches = self.qualified_fields_with_unqualified_name(name);
         match matches.len() {
             0 => Err(unqualified_field_not_found(name, self)),
@@ -845,6 +929,7 @@ impl DFSchema {
             field_qualifiers: vec![None; self.inner.fields.len()],
             inner: self.inner,
             functional_dependencies: self.functional_dependencies,
+            ambiguous_names: self.ambiguous_names,
         }
     }
 
@@ -855,6 +940,7 @@ impl DFSchema {
             field_qualifiers: vec![Some(qualifier); self.inner.fields.len()],
             inner: self.inner,
             functional_dependencies: self.functional_dependencies,
+            ambiguous_names: self.ambiguous_names,
         }
     }
 
@@ -1126,6 +1212,7 @@ impl TryFrom<SchemaRef> for DFSchema {
             inner: schema,
             field_qualifiers: vec![None; field_count],
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         };
         // Without checking names, because schema here may have duplicate field names.
         // For example, Partial AggregateMode will generate duplicate field names from
@@ -1187,6 +1274,7 @@ impl ToDFSchema for Vec<Field> {
             inner: schema.into(),
             field_qualifiers: vec![None; field_count],
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         };
         Ok(dfschema)
     }
@@ -1578,6 +1666,7 @@ mod tests {
             inner: Arc::clone(&arrow_schema_ref),
             field_qualifiers: vec![None; arrow_schema_ref.fields.len()],
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         };
         let df_schema_ref = Arc::new(df_schema.clone());
 
@@ -1624,6 +1713,7 @@ mod tests {
             inner: Arc::clone(&schema),
             field_qualifiers: vec![None; schema.fields.len()],
             functional_dependencies: FunctionalDependencies::empty(),
+            ambiguous_names: None,
         };
 
         assert_eq!(df_schema.inner.metadata(), schema.metadata())
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index bf84fcc53e957..540c8e941015a 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -3570,7 +3570,7 @@ mod tests {
             .expect_err("planning error")
             .strip_backtrace();
 
-        insta::assert_snapshot!(e, @r#"Error during planning: Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: DFSchema { inner: Schema { fields: [Field { name: "a", data_type: Int32 }], metadata: {} }, field_qualifiers: [None], functional_dependencies: FunctionalDependencies { deps: [] } }, ExecutionPlan schema: Schema { fields: [Field { name: "b", data_type: Int32 }], metadata: {} }"#);
+        insta::assert_snapshot!(e, @r#"Error during planning: Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: DFSchema { inner: Schema { fields: [Field { name: "a", data_type: Int32 }], metadata: {} }, field_qualifiers: [None], functional_dependencies: FunctionalDependencies { deps: [] }, ambiguous_names: {} }, ExecutionPlan schema: Schema { fields: [Field { name: "b", data_type: Int32 }], metadata: {} }"#);
     }
 
     #[tokio::test]
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 5381313e2ee9b..39f84ccb38766 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1745,7 +1745,30 @@ pub fn build_join_schema(
         .collect();
 
     let dfschema = DFSchema::new_with_metadata(qualified_fields, metadata)?;
-    dfschema.with_functional_dependencies(func_dependencies)
+    let dfschema = dfschema.with_functional_dependencies(func_dependencies)?;
+
+    // Propagate ambiguous names from both input schemas.  A name that was
+    // already ambiguous on either side of the join (e.g. because the left
+    // input is itself a subquery that wrapped a JOIN) remains ambiguous in
+    // the output.  We only propagate names that actually appear as field
+    // names in the output schema so we don't accumulate stale entries.
+    let output_field_names: HashSet<&str> = dfschema
+        .fields()
+        .iter()
+        .map(|f| f.name().as_str())
+        .collect();
+    let inherited_ambiguous: HashSet<String> = left
+        .ambiguous_names()
+        .iter()
+        .chain(right.ambiguous_names())
+        .filter(|n| output_field_names.contains(n.as_str()))
+        .cloned()
+        .collect();
+    if inherited_ambiguous.is_empty() {
+        Ok(dfschema)
+    } else {
+        Ok(dfschema.with_ambiguous_names(inherited_ambiguous))
+    }
 }
 
 /// (Re)qualify the sides of a join if needed, i.e. if the columns from one side would otherwise
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 07e0eb1a77aa9..bfb513e661cef 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -2375,6 +2375,29 @@ pub fn projection_schema(input: &LogicalPlan, exprs: &[Expr]) -> Result<Arc<DFSc
                 exprs, input,
             )?)?;
 
+    // Propagate ambiguous names from the input for any column passed through
+    // unchanged.  This prevents a `SELECT * FROM (...) AS alias` wrapper from
+    // silently dropping the ambiguity marker set by an inner JOIN or alias.
+    let input_ambiguous = input.schema().ambiguous_names();
+    if !input_ambiguous.is_empty() {
+        // A column is a pass-through when it is `Expr::Column(c)` and `c.name`
+        // appears in the input's ambiguous set.
+        let inherited: HashSet<String> = exprs
+            .iter()
+            .filter_map(|e| {
+                if let Expr::Column(col) = e
+                    && input_ambiguous.contains(&col.name)
+                {
+                    return Some(col.name.clone());
+                }
+                None
+            })
+            .collect();
+        if !inherited.is_empty() {
+            return Ok(Arc::new(schema.with_ambiguous_names(inherited)));
+        }
+    }
+
     Ok(Arc::new(schema))
 }
 
@@ -2406,6 +2429,39 @@ impl SubqueryAlias {
         let aliases = unique_field_aliases(plan.schema().fields());
         let is_projection_needed = aliases.iter().any(Option::is_some);
 
+        // Collect unqualified field names that are ambiguous in this alias's
+        // output schema.  `unique_field_aliases` renames duplicates (e.g. to
+        // "id:1") to keep Arrow happy, but outer queries must still be
+        // prevented from referencing those names without qualification.
+        // We also inherit names already marked ambiguous by the input schema
+        // so nested `SELECT * FROM (...) AS sN` wrappers don't lose the marker.
+        let ambiguous_names: HashSet<String> = {
+            let mut name_counts: HashMap<&str, usize> = HashMap::new();
+            for field in plan.schema().fields() {
+                *name_counts.entry(field.name().as_str()).or_insert(0) += 1;
+            }
+            let mut names: HashSet<String> = name_counts
+                .into_iter()
+                .filter(|&(_, count)| count >= 2)
+                .map(|(name, _)| name.to_string())
+                .collect();
+
+            // Inherit names still visible in the output (the first occurrence
+            // of a renamed duplicate like "id:1" still keeps the name "id").
+            let output_field_names: HashSet<&str> = plan
+                .schema()
+                .fields()
+                .iter()
+                .map(|f| f.name().as_str())
+                .collect();
+            for inherited in plan.schema().ambiguous_names() {
+                if output_field_names.contains(inherited.as_str()) {
+                    names.insert(inherited.clone());
+                }
+            }
+            names
+        };
+
         // Insert a projection node, if needed, to make sure aliases are applied.
         let plan = if is_projection_needed {
             let projection_expressions = aliases
@@ -2438,7 +2494,8 @@ impl SubqueryAlias {
 
         let schema = DFSchemaRef::new(
             DFSchema::try_from_qualified_schema(alias.clone(), schema)?
-                .with_functional_dependencies(func_dependencies)?,
+                .with_functional_dependencies(func_dependencies)?
+                .with_ambiguous_names(ambiguous_names),
         );
         Ok(SubqueryAlias {
             input: plan,
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index fd606af3a6af0..287e1c472322f 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -3938,6 +3938,19 @@ fn order_by_ambiguous_name() {
     );
 }
 
+#[test]
+fn order_by_ambiguous_name_via_subquery() {
+    // `age` is not in the SELECT list; ORDER BY falls back to the FROM schema,
+    // which is a subquery over a JOIN — `age` must still be flagged ambiguous.
+    let sql = "SELECT id FROM (SELECT * FROM person a JOIN person b USING (id)) sub ORDER BY age";
+    let err = logical_plan(sql).unwrap_err().strip_backtrace();
+
+    assert_snapshot!(
+        err,
+        @"Schema error: Ambiguous reference to unqualified field age"
+    );
+}
+
 #[test]
 fn group_by_ambiguous_name() {
     let sql = "select max(id) from person a join person b using (id) group by age";
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 1ed3fc89b2642..42bd77eda0153 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -5508,3 +5508,135 @@ DROP TABLE t1;
 
 statement ok
 DROP TABLE t2;
+
+####
+# Ambiguous unqualified column references through a subquery alias wrapping JOINs.
+# When two or more JOIN inputs share a column name the outer query must use
+# the qualified form (alias.column); bare unqualified references must be rejected.
+####
+
+statement ok
+CREATE TABLE t_left(id INT, age INT, name VARCHAR) AS VALUES
+  (1, 10, 'alice'),
+  (2, 20, 'bob'),
+  (3, 30, 'carol');
+
+statement ok
+CREATE TABLE t_right(id INT, age INT, score INT) AS VALUES
+  (1, 10, 100),
+  (2, 20, 200),
+  (4, 40, 400);
+
+statement ok
+CREATE TABLE t_extra(id INT, dept VARCHAR) AS VALUES
+  (1, 'eng'),
+  (2, 'sales'),
+  (5, 'hr');
+
+# 2-way join: qualified references to columns shared by both sides work fine
+query III rowsort
+SELECT sub.id, sub.age, sub.score
+FROM (SELECT t_left.id, t_left.age, t_right.score
+      FROM t_left JOIN t_right ON t_left.id = t_right.id) AS sub;
+----
+1 10 100
+2 20 200
+
+# 2-way join: unqualified "id" is ambiguous (both sides expose it)
+query error DataFusion error: Schema error: Ambiguous reference to unqualified field id
+SELECT sub.id FROM (SELECT * FROM t_left JOIN t_right ON t_left.id = t_right.id) AS sub WHERE id = 1;
+
+# 2-way join: unqualified "age" is ambiguous (both sides expose it)
+query error DataFusion error: Schema error: Ambiguous reference to unqualified field age
+SELECT sub.age FROM (SELECT * FROM t_left JOIN t_right ON t_left.id = t_right.id) AS sub WHERE age > 5;
+
+# 3-way join: qualified references still work when all three tables share "id"
+query IIIT rowsort
+SELECT sub.id, sub.age, sub.score, sub.dept
+FROM (SELECT t_left.id, t_left.age, t_right.score, t_extra.dept
+      FROM t_left
+      JOIN t_right ON t_left.id = t_right.id
+      JOIN t_extra ON t_left.id = t_extra.id) AS sub;
+----
+1 10 100 eng
+2 20 200 sales
+
+# 3-way join: unqualified "id" is ambiguous (present in all three tables)
+query error DataFusion error: Schema error: Ambiguous reference to unqualified field id
+SELECT sub.id FROM (SELECT * FROM t_left
+                    JOIN t_right ON t_left.id = t_right.id
+                    JOIN t_extra ON t_left.id = t_extra.id) AS sub
+WHERE id = 1;
+
+# 3-way join: unqualified "age" is ambiguous (shared by t_left and t_right)
+query error DataFusion error: Schema error: Ambiguous reference to unqualified field age
+SELECT sub.age FROM (SELECT * FROM t_left
+                     JOIN t_right ON t_left.id = t_right.id
+                     JOIN t_extra ON t_left.id = t_extra.id) AS sub
+WHERE age > 5;
+
+# 3-way join: unambiguous columns (unique to one table) need no qualifier
+query IT rowsort
+SELECT sub.score, sub.dept
+FROM (SELECT t_left.id, t_left.age, t_right.score, t_extra.dept
+      FROM t_left
+      JOIN t_right ON t_left.id = t_right.id
+      JOIN t_extra ON t_left.id = t_extra.id) AS sub;
+----
+100 eng
+200 sales
+
+# Nested derived table: double SELECT * wrapper must preserve ambiguity.
+# SELECT age FROM ( SELECT * FROM ( SELECT * FROM t_left JOIN t_right ON t_left.id = t_right.id ) AS s1 ) AS s2
+# "age" is ambiguous in s1 (from both t_left and t_right) and must stay
+# ambiguous when s1 is wrapped in another SELECT * … AS s2.
+query error DataFusion error: Schema error: Ambiguous reference to unqualified field age
+SELECT age FROM (
+  SELECT * FROM (
+    SELECT * FROM t_left JOIN t_right ON t_left.id = t_right.id
+  ) AS s1
+) AS s2;
+
+# Nested derived table: same for "id"
+query error DataFusion error: Schema error: Ambiguous reference to unqualified field id
+SELECT id FROM (
+  SELECT * FROM (
+    SELECT * FROM t_left JOIN t_right ON t_left.id = t_right.id
+  ) AS s1
+) AS s2;
+
+# Join over subquery + table: ambiguous names from the subquery side must
+# propagate into the outer join schema so that bare "age" is still rejected.
+# Set up a seed table with a single column so only the subquery side has "age".
+statement ok
+CREATE TABLE seed(val INT) AS VALUES (1), (2);
+
+query error DataFusion error: Schema error: Ambiguous reference to unqualified field age
+SELECT age FROM (SELECT * FROM t_left JOIN t_right ON t_left.id = t_right.id) sub
+JOIN seed ON true;
+
+# Qualified access through the subquery alias is still fine even after joining
+# with another table.
+query II rowsort
+SELECT sub.id, sub.score FROM (
+  SELECT t_left.id, t_right.score
+  FROM t_left JOIN t_right ON t_left.id = t_right.id
+) sub
+JOIN seed ON true;
+----
+1 100
+1 100
+2 200
+2 200
+
+statement ok
+DROP TABLE seed;
+
+statement ok
+DROP TABLE t_left;
+
+statement ok
+DROP TABLE t_right;
+
+statement ok
+DROP TABLE t_extra;

From e3e3c55cb03ce205bedd9438e0ef982eb6876cff Mon Sep 17 00:00:00 2001
From: xiedeyantu <czjourney@163.com>
Date: Wed, 8 Apr 2026 22:10:26 +0800
Subject: [PATCH 2/4] fix test

---
 datafusion/common/src/dfschema.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index b44d4d70b0323..13e0be8c24dff 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -125,7 +125,7 @@ pub struct DFSchema {
 }
 
 impl fmt::Debug for DFSchema {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         // Show the ambiguous-names set as `{}` when it is empty/absent so that
         // existing Debug snapshots are not affected by the Option wrapper.
         let empty = HashSet::new();
@@ -1281,7 +1281,7 @@ impl ToDFSchema for Vec<Field> {
 }
 
 impl Display for DFSchema {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
         write!(
             f,
             "fields:[{}], metadata:{:?}",
@@ -1299,7 +1299,7 @@ impl Display for DFSchema {
 ///
 /// Note that this trait is implemented for &[DFSchema] which is
 /// widely used in the DataFusion codebase.
-pub trait ExprSchema: std::fmt::Debug {
+pub trait ExprSchema: fmt::Debug {
     /// Is this column reference nullable?
     fn nullable(&self, col: &Column) -> Result<bool> {
         Ok(self.field_from_column(col)?.is_nullable())
@@ -1326,7 +1326,7 @@ pub trait ExprSchema: std::fmt::Debug {
 }
 
 // Implement `ExprSchema` for `Arc<DFSchema>`
-impl<P: AsRef<DFSchema> + std::fmt::Debug> ExprSchema for P {
+impl<P: AsRef<DFSchema> + fmt::Debug> ExprSchema for P {
     fn nullable(&self, col: &Column) -> Result<bool> {
         self.as_ref().nullable(col)
     }

From 4e8a1a5193dfd884bcbc8016546310a45f1747e9 Mon Sep 17 00:00:00 2001
From: xiedeyantu <czjourney@163.com>
Date: Wed, 8 Apr 2026 22:28:50 +0800
Subject: [PATCH 3/4] fix fmt

---
 datafusion/common/src/dfschema.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 13e0be8c24dff..ca43a9e49f53c 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -323,8 +323,7 @@ impl DFSchema {
     /// Returns a reference to an empty set when no ambiguous names have been
     /// recorded (the common case).
     pub fn ambiguous_names(&self) -> &HashSet<String> {
-        static EMPTY: std::sync::OnceLock<HashSet<String>> =
-            std::sync::OnceLock::new();
+        static EMPTY: std::sync::OnceLock<HashSet<String>> = std::sync::OnceLock::new();
         self.ambiguous_names
             .as_deref()
             .unwrap_or_else(|| EMPTY.get_or_init(HashSet::new))

From 0c8b96d7249c26119873f704a78fbb7043bbee02 Mon Sep 17 00:00:00 2001
From: xiedeyantu <czjourney@163.com>
Date: Thu, 9 Apr 2026 07:09:22 +0800
Subject: [PATCH 4/4] fix panic

---
 datafusion/core/benches/sql_planner.rs | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs
index 59502da987904..fcc8da30fedd9 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -130,7 +130,8 @@ fn register_clickbench_hits_table(rt: &Runtime) -> SessionContext {
             format!("{BENCHMARKS_PATH_2}{CLICKBENCH_DATA_PATH}")
         };
 
-    let sql = format!("CREATE EXTERNAL TABLE hits STORED AS PARQUET LOCATION '{path}'");
+    let sql =
+        format!("CREATE EXTERNAL TABLE hits_raw STORED AS PARQUET LOCATION '{path}'");
 
     // ClickBench partitioned dataset was written by an ancient version of pyarrow that
     // that wrote strings with the wrong logical type. To read it correctly, we must
@@ -139,6 +140,17 @@ fn register_clickbench_hits_table(rt: &Runtime) -> SessionContext {
         .unwrap();
     rt.block_on(ctx.sql(&sql)).unwrap();
 
+    // ClickBench stores EventDate as UInt16 (days since 1970-01-01). Create a view
+    // that exposes it as SQL DATE so that queries comparing it with date literals
+    // (e.g. "EventDate >= '2013-07-01'") work correctly during planning.
+    rt.block_on(ctx.sql(
+        "CREATE VIEW hits AS \
+         SELECT * EXCEPT (\"EventDate\"), \
+                CAST(CAST(\"EventDate\" AS INTEGER) AS DATE) AS \"EventDate\" \
+         FROM hits_raw",
+    ))
+    .unwrap();
+
     let count =
         rt.block_on(async { ctx.table("hits").await.unwrap().count().await.unwrap() });
     assert!(count > 0);