1919//! fields with optional relation names.
2020
2121use std:: collections:: { BTreeSet , HashMap , HashSet } ;
22- use std:: fmt:: { Display , Formatter } ;
22+ use std:: fmt:: { self , Display , Formatter } ;
2323use std:: hash:: Hash ;
2424use std:: sync:: Arc ;
2525
@@ -108,7 +108,7 @@ pub type DFSchemaRef = Arc<DFSchema>;
108108/// let schema: &Schema = df_schema.as_arrow();
109109/// assert_eq!(schema.fields().len(), 1);
110110/// ```
111- #[ derive( Debug , Clone , PartialEq , Eq ) ]
111+ #[ derive( Clone , PartialEq , Eq ) ]
112112pub struct DFSchema {
113113 /// Inner Arrow schema reference.
114114 inner : SchemaRef ,
@@ -117,6 +117,26 @@ pub struct DFSchema {
117117 field_qualifiers : Vec < Option < TableReference > > ,
118118 /// Stores functional dependencies in the schema.
119119 functional_dependencies : FunctionalDependencies ,
120+ /// Field names that are ambiguous in this schema because the underlying
121+ /// source (e.g. a derived-table subquery) contained multiple columns with
122+ /// the same unqualified name. Any attempt to reference these names without
123+ /// a qualifier should produce an [`SchemaError::AmbiguousReference`] error.
124+ ambiguous_names : Option < Arc < HashSet < String > > > ,
125+ }
126+
127+ impl fmt:: Debug for DFSchema {
128+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
129+ // Show the ambiguous-names set as `{}` when it is empty/absent so that
130+ // existing Debug snapshots are not affected by the Option wrapper.
131+ let empty = HashSet :: new ( ) ;
132+ let ambiguous = self . ambiguous_names . as_deref ( ) . unwrap_or ( & empty) ;
133+ f. debug_struct ( "DFSchema" )
134+ . field ( "inner" , & self . inner )
135+ . field ( "field_qualifiers" , & self . field_qualifiers )
136+ . field ( "functional_dependencies" , & self . functional_dependencies )
137+ . field ( "ambiguous_names" , ambiguous)
138+ . finish ( )
139+ }
120140}
121141
122142impl DFSchema {
@@ -126,6 +146,7 @@ impl DFSchema {
126146 inner : Arc :: new ( Schema :: new ( [ ] ) ) ,
127147 field_qualifiers : vec ! [ ] ,
128148 functional_dependencies : FunctionalDependencies :: empty ( ) ,
149+ ambiguous_names : None ,
129150 }
130151 }
131152
@@ -157,6 +178,7 @@ impl DFSchema {
157178 inner : schema,
158179 field_qualifiers : qualifiers,
159180 functional_dependencies : FunctionalDependencies :: empty ( ) ,
181+ ambiguous_names : None ,
160182 } ;
161183 dfschema. check_names ( ) ?;
162184 Ok ( dfschema)
@@ -173,6 +195,7 @@ impl DFSchema {
173195 inner : schema,
174196 field_qualifiers : vec ! [ None ; field_count] ,
175197 functional_dependencies : FunctionalDependencies :: empty ( ) ,
198+ ambiguous_names : None ,
176199 } ;
177200 dfschema. check_names ( ) ?;
178201 Ok ( dfschema)
@@ -191,6 +214,7 @@ impl DFSchema {
191214 inner : schema. clone ( ) . into ( ) ,
192215 field_qualifiers : vec ! [ Some ( qualifier) ; schema. fields. len( ) ] ,
193216 functional_dependencies : FunctionalDependencies :: empty ( ) ,
217+ ambiguous_names : None ,
194218 } ;
195219 schema. check_names ( ) ?;
196220 Ok ( schema)
@@ -205,6 +229,7 @@ impl DFSchema {
205229 inner : Arc :: clone ( schema) ,
206230 field_qualifiers : qualifiers,
207231 functional_dependencies : FunctionalDependencies :: empty ( ) ,
232+ ambiguous_names : None ,
208233 } ;
209234 dfschema. check_names ( ) ?;
210235 Ok ( dfschema)
@@ -226,6 +251,7 @@ impl DFSchema {
226251 inner : Arc :: clone ( & self . inner ) ,
227252 field_qualifiers : qualifiers,
228253 functional_dependencies : self . functional_dependencies . clone ( ) ,
254+ ambiguous_names : self . ambiguous_names . clone ( ) ,
229255 } )
230256 }
231257
@@ -275,6 +301,35 @@ impl DFSchema {
275301 }
276302 }
277303
304+ /// Marks the given field names as ambiguous.
305+ ///
306+ /// Ambiguous names correspond to fields that originated from multiple
307+ /// source columns with the same unqualified name (e.g. both sides of a
308+ /// JOIN having an `age` column). Any attempt to resolve such a name
309+ /// without a table qualifier will produce an
310+ /// [`SchemaError::AmbiguousReference`] error.
311+ pub fn with_ambiguous_names ( mut self , names : HashSet < String > ) -> Self {
312+ self . ambiguous_names = if names. is_empty ( ) {
313+ None
314+ } else {
315+ Some ( Arc :: new ( names) )
316+ } ;
317+ self
318+ }
319+
320+ /// Returns the set of field names that are considered ambiguous in this
321+ /// schema. See [`Self::with_ambiguous_names`].
322+ ///
323+ /// Returns a reference to an empty set when no ambiguous names have been
324+ /// recorded (the common case).
325+ pub fn ambiguous_names ( & self ) -> & HashSet < String > {
326+ static EMPTY : std:: sync:: OnceLock < HashSet < String > > =
327+ std:: sync:: OnceLock :: new ( ) ;
328+ self . ambiguous_names
329+ . as_deref ( )
330+ . unwrap_or_else ( || EMPTY . get_or_init ( HashSet :: new) )
331+ }
332+
278333 /// Create a new schema that contains the fields from this schema followed by the fields
279334 /// from the supplied schema. An error will be returned if there are duplicate field names.
280335 pub fn join ( & self , schema : & DFSchema ) -> Result < Self > {
@@ -294,6 +349,7 @@ impl DFSchema {
294349 inner : Arc :: new ( new_schema_with_metadata) ,
295350 field_qualifiers : new_qualifiers,
296351 functional_dependencies : FunctionalDependencies :: empty ( ) ,
352+ ambiguous_names : None ,
297353 } ;
298354 new_self. check_names ( ) ?;
299355 Ok ( new_self)
@@ -350,6 +406,22 @@ impl DFSchema {
350406 let finished_with_metadata = finished. with_metadata ( metadata) ;
351407 self . inner = finished_with_metadata. into ( ) ;
352408 self . field_qualifiers . extend ( qualifiers) ;
409+ // Propagate ambiguous names from the other schema so that names marked
410+ // as ambiguous (e.g. by a JOIN) are not silently dropped when schemas
411+ // are merged for ORDER BY / HAVING resolution.
412+ if let Some ( other_names) = & other_schema. ambiguous_names {
413+ match & mut self . ambiguous_names {
414+ Some ( self_names) => {
415+ // Build a new combined set (Arc prevents in-place mutation).
416+ let mut combined = ( * * self_names) . clone ( ) ;
417+ combined. extend ( other_names. iter ( ) . cloned ( ) ) ;
418+ self . ambiguous_names = Some ( Arc :: new ( combined) ) ;
419+ }
420+ None => {
421+ self . ambiguous_names = Some ( Arc :: clone ( other_names) ) ;
422+ }
423+ }
424+ }
353425 }
354426
355427 /// Get a list of fields for this schema
@@ -506,6 +578,18 @@ impl DFSchema {
506578 & self ,
507579 name : & str ,
508580 ) -> Result < ( Option < & TableReference > , & FieldRef ) > {
581+ // If this field name was marked as ambiguous at schema creation time
582+ // (e.g. because a derived-table subquery produced duplicate column
583+ // names), refuse to resolve it without an explicit qualifier.
584+ if self
585+ . ambiguous_names
586+ . as_ref ( )
587+ . is_some_and ( |s| s. contains ( name) )
588+ {
589+ return _schema_err ! ( SchemaError :: AmbiguousReference {
590+ field: Box :: new( Column :: new_unqualified( name. to_string( ) ) )
591+ } ) ;
592+ }
509593 let matches = self . qualified_fields_with_unqualified_name ( name) ;
510594 match matches. len ( ) {
511595 0 => Err ( unqualified_field_not_found ( name, self ) ) ,
@@ -845,6 +929,7 @@ impl DFSchema {
845929 field_qualifiers : vec ! [ None ; self . inner. fields. len( ) ] ,
846930 inner : self . inner ,
847931 functional_dependencies : self . functional_dependencies ,
932+ ambiguous_names : self . ambiguous_names ,
848933 }
849934 }
850935
@@ -855,6 +940,7 @@ impl DFSchema {
855940 field_qualifiers : vec ! [ Some ( qualifier) ; self . inner. fields. len( ) ] ,
856941 inner : self . inner ,
857942 functional_dependencies : self . functional_dependencies ,
943+ ambiguous_names : self . ambiguous_names ,
858944 }
859945 }
860946
@@ -1126,6 +1212,7 @@ impl TryFrom<SchemaRef> for DFSchema {
11261212 inner : schema,
11271213 field_qualifiers : vec ! [ None ; field_count] ,
11281214 functional_dependencies : FunctionalDependencies :: empty ( ) ,
1215+ ambiguous_names : None ,
11291216 } ;
11301217 // Without checking names, because schema here may have duplicate field names.
11311218 // For example, Partial AggregateMode will generate duplicate field names from
@@ -1187,6 +1274,7 @@ impl ToDFSchema for Vec<Field> {
11871274 inner : schema. into ( ) ,
11881275 field_qualifiers : vec ! [ None ; field_count] ,
11891276 functional_dependencies : FunctionalDependencies :: empty ( ) ,
1277+ ambiguous_names : None ,
11901278 } ;
11911279 Ok ( dfschema)
11921280 }
@@ -1578,6 +1666,7 @@ mod tests {
15781666 inner : Arc :: clone ( & arrow_schema_ref) ,
15791667 field_qualifiers : vec ! [ None ; arrow_schema_ref. fields. len( ) ] ,
15801668 functional_dependencies : FunctionalDependencies :: empty ( ) ,
1669+ ambiguous_names : None ,
15811670 } ;
15821671 let df_schema_ref = Arc :: new ( df_schema. clone ( ) ) ;
15831672
@@ -1624,6 +1713,7 @@ mod tests {
16241713 inner : Arc :: clone ( & schema) ,
16251714 field_qualifiers : vec ! [ None ; schema. fields. len( ) ] ,
16261715 functional_dependencies : FunctionalDependencies :: empty ( ) ,
1716+ ambiguous_names : None ,
16271717 } ;
16281718
16291719 assert_eq ! ( df_schema. inner. metadata( ) , schema. metadata( ) )
0 commit comments