@@ -840,10 +840,37 @@ pub fn try_type_union_resolution_with_struct(
840840 Ok ( final_struct_types)
841841}
842842
843- /// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a
844- /// comparison operation
843+ /// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of
844+ /// type unification — that is, contexts where two values must be brought to
845+ /// a common type but are not being compared. Examples include UNION, CASE,
846+ /// IN lists, NVL2, and struct field coercion.
847+ ///
848+ /// When unifying numeric values and strings, both values will be coerced to
849+ /// strings. For example, in `SELECT 1 UNION SELECT '2'`, both sides are
850+ /// coerced to `Utf8` since string is the safe widening type.
845851///
846- /// Example comparison operations are `lhs = rhs` and `lhs > rhs`
852+ /// For comparison operations (e.g., `=`, `<`, `>`), use [`comparison_coercion`]
853+ /// instead, which prefers numeric types over strings.
854+ pub fn type_union_coercion ( lhs_type : & DataType , rhs_type : & DataType ) -> Option < DataType > {
855+ if lhs_type. equals_datatype ( rhs_type) {
856+ return Some ( lhs_type. clone ( ) ) ;
857+ }
858+ binary_numeric_coercion ( lhs_type, rhs_type)
859+ . or_else ( || dictionary_type_union_coercion ( lhs_type, rhs_type, true ) )
860+ . or_else ( || ree_type_union_coercion ( lhs_type, rhs_type, true ) )
861+ . or_else ( || temporal_coercion_nonstrict_timezone ( lhs_type, rhs_type) )
862+ . or_else ( || string_coercion ( lhs_type, rhs_type) )
863+ . or_else ( || list_coercion ( lhs_type, rhs_type) )
864+ . or_else ( || null_coercion ( lhs_type, rhs_type) )
865+ . or_else ( || string_numeric_union_coercion ( lhs_type, rhs_type) )
866+ . or_else ( || string_temporal_coercion ( lhs_type, rhs_type) )
867+ . or_else ( || binary_coercion ( lhs_type, rhs_type) )
868+ . or_else ( || struct_coercion ( lhs_type, rhs_type) )
869+ . or_else ( || map_coercion ( lhs_type, rhs_type) )
870+ }
871+
872+ /// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a
873+ /// comparison operation (e.g., `=`, `!=`, `<`, `>`, `<=`, `>=`).
847874///
848875/// Binary comparison kernels require the two arguments to be the (exact) same
849876/// data type. However, users can write queries where the two arguments are
@@ -859,11 +886,15 @@ pub fn try_type_union_resolution_with_struct(
859886///
860887/// # Numeric / String comparisons
861888///
862- /// When comparing numeric values and strings, both values will be coerced to
863- /// strings. For example when comparing `'2' > 1`, the arguments will be
864- /// coerced to `Utf8` for comparison
889+ /// When comparing numeric values and strings, the string value will be coerced
890+ /// to the numeric type. For example when comparing `'2' > 1` where `1` is
891+ /// `Int32`, `'2'` will be coerced to `Int32` for comparison.
892+ ///
893+ /// For type unification contexts (see [`type_union_coercion`]), use
894+ /// [`type_union_coercion`] instead, which prefers strings as the safe widening
895+ /// type.
865896pub fn comparison_coercion ( lhs_type : & DataType , rhs_type : & DataType ) -> Option < DataType > {
866- if lhs_type. equals_datatype ( rhs_type) {
897+ if lhs_type == rhs_type {
867898 // same type => equality is possible
868899 return Some ( lhs_type. clone ( ) ) ;
869900 }
@@ -881,33 +912,29 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
881912 . or_else ( || map_coercion ( lhs_type, rhs_type) )
882913}
883914
884- /// Similar to [`comparison_coercion`] but prefers numeric if compares with
885- /// numeric and string
886- ///
887- /// # Numeric comparisons
888- ///
889- /// When comparing numeric values and strings, the values will be coerced to the
890- /// numeric type. For example, `'2' > 1` if `1` is an `Int32`, the arguments
891- /// will be coerced to `Int32`.
892- pub fn comparison_coercion_numeric (
893- lhs_type : & DataType ,
894- rhs_type : & DataType ,
895- ) -> Option < DataType > {
896- if lhs_type == rhs_type {
897- // same type => equality is possible
898- return Some ( lhs_type. clone ( ) ) ;
915+ /// Coerce `lhs_type` and `rhs_type` to a common type where one is numeric and
916+ /// one is string, preferring the numeric type. Used for comparison contexts
917+ /// where numeric comparison semantics are desired.
918+ fn string_numeric_coercion ( lhs_type : & DataType , rhs_type : & DataType ) -> Option < DataType > {
919+ let lhs_logical_type = NativeType :: from ( lhs_type) ;
920+ let rhs_logical_type = NativeType :: from ( rhs_type) ;
921+ if lhs_logical_type. is_numeric ( ) && rhs_logical_type == NativeType :: String {
922+ return Some ( lhs_type. to_owned ( ) ) ;
899923 }
900- binary_numeric_coercion ( lhs_type, rhs_type)
901- . or_else ( || dictionary_comparison_coercion_numeric ( lhs_type, rhs_type, true ) )
902- . or_else ( || ree_comparison_coercion_numeric ( lhs_type, rhs_type, true ) )
903- . or_else ( || string_coercion ( lhs_type, rhs_type) )
904- . or_else ( || null_coercion ( lhs_type, rhs_type) )
905- . or_else ( || string_numeric_coercion_as_numeric ( lhs_type, rhs_type) )
924+ if rhs_logical_type. is_numeric ( ) && lhs_logical_type == NativeType :: String {
925+ return Some ( rhs_type. to_owned ( ) ) ;
926+ }
927+
928+ None
906929}
907930
908- /// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation
909- /// where one is numeric and one is `Utf8`/`LargeUtf8`.
910- fn string_numeric_coercion ( lhs_type : & DataType , rhs_type : & DataType ) -> Option < DataType > {
931+ /// Coerce `lhs_type` and `rhs_type` to a common type where one is numeric and
932+ /// one is string, preferring the string type. Used for type unification contexts
933+ /// (see [`type_union_coercion`]) where string is the safe widening type.
934+ fn string_numeric_union_coercion (
935+ lhs_type : & DataType ,
936+ rhs_type : & DataType ,
937+ ) -> Option < DataType > {
911938 use arrow:: datatypes:: DataType :: * ;
912939 match ( lhs_type, rhs_type) {
913940 ( Utf8 , _) if rhs_type. is_numeric ( ) => Some ( Utf8 ) ,
@@ -920,24 +947,6 @@ fn string_numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
920947 }
921948}
922949
923- /// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation
924- /// where one is numeric and one is `Utf8`/`LargeUtf8`.
925- fn string_numeric_coercion_as_numeric (
926- lhs_type : & DataType ,
927- rhs_type : & DataType ,
928- ) -> Option < DataType > {
929- let lhs_logical_type = NativeType :: from ( lhs_type) ;
930- let rhs_logical_type = NativeType :: from ( rhs_type) ;
931- if lhs_logical_type. is_numeric ( ) && rhs_logical_type == NativeType :: String {
932- return Some ( lhs_type. to_owned ( ) ) ;
933- }
934- if rhs_logical_type. is_numeric ( ) && lhs_logical_type == NativeType :: String {
935- return Some ( rhs_type. to_owned ( ) ) ;
936- }
937-
938- None
939- }
940-
941950/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation
942951/// where one is temporal and one is `Utf8View`/`Utf8`/`LargeUtf8`.
943952///
@@ -1308,7 +1317,7 @@ fn coerce_struct_by_name(lhs_fields: &Fields, rhs_fields: &Fields) -> Option<Dat
13081317
13091318 for lhs in lhs_fields. iter ( ) {
13101319 let rhs = rhs_by_name. get ( lhs. name ( ) . as_str ( ) ) . unwrap ( ) ; // safe: caller ensured names match
1311- let coerced_type = comparison_coercion ( lhs. data_type ( ) , rhs. data_type ( ) ) ?;
1320+ let coerced_type = type_union_coercion ( lhs. data_type ( ) , rhs. data_type ( ) ) ?;
13121321 let is_nullable = lhs. is_nullable ( ) || rhs. is_nullable ( ) ;
13131322 coerced. push ( Arc :: new ( Field :: new (
13141323 lhs. name ( ) . clone ( ) ,
@@ -1332,7 +1341,7 @@ fn coerce_struct_by_position(
13321341 let coerced_types: Vec < DataType > = lhs_fields
13331342 . iter ( )
13341343 . zip ( rhs_fields. iter ( ) )
1335- . map ( |( l, r) | comparison_coercion ( l. data_type ( ) , r. data_type ( ) ) )
1344+ . map ( |( l, r) | type_union_coercion ( l. data_type ( ) , r. data_type ( ) ) )
13361345 . collect :: < Option < Vec < DataType > > > ( ) ?;
13371346
13381347 // Build final fields preserving left-side names and combined nullability.
@@ -1512,12 +1521,11 @@ fn dictionary_comparison_coercion_generic(
15121521 }
15131522}
15141523
1515- /// Coercion rules for Dictionaries: the type that both lhs and rhs
1516- /// can be casted to for the purpose of a computation.
1524+ /// Coercion rules for Dictionaries in type unification contexts (see [`type_union_coercion`]).
15171525///
15181526/// Not all operators support dictionaries, if `preserve_dictionaries` is true
15191527/// dictionaries will be preserved if possible
1520- fn dictionary_comparison_coercion (
1528+ fn dictionary_type_union_coercion (
15211529 lhs_type : & DataType ,
15221530 rhs_type : & DataType ,
15231531 preserve_dictionaries : bool ,
@@ -1526,17 +1534,14 @@ fn dictionary_comparison_coercion(
15261534 lhs_type,
15271535 rhs_type,
15281536 preserve_dictionaries,
1529- comparison_coercion ,
1537+ type_union_coercion ,
15301538 )
15311539}
15321540
1533- /// Coercion rules for Dictionaries with numeric preference: similar to
1534- /// [`dictionary_comparison_coercion`] but uses [`comparison_coercion_numeric`]
1535- /// which prefers numeric types over strings when both are present.
1541+ /// Coercion rules for Dictionaries in comparison contexts.
15361542///
1537- /// This is used by [`comparison_coercion_numeric`] to maintain consistent
1538- /// numeric-preferring semantics when dealing with dictionary types.
1539- fn dictionary_comparison_coercion_numeric (
1543+ /// Prefers numeric types over strings when both are present.
1544+ fn dictionary_comparison_coercion (
15401545 lhs_type : & DataType ,
15411546 rhs_type : & DataType ,
15421547 preserve_dictionaries : bool ,
@@ -1545,7 +1550,7 @@ fn dictionary_comparison_coercion_numeric(
15451550 lhs_type,
15461551 rhs_type,
15471552 preserve_dictionaries,
1548- comparison_coercion_numeric ,
1553+ comparison_coercion ,
15491554 )
15501555}
15511556
@@ -1584,36 +1589,27 @@ fn ree_comparison_coercion_generic(
15841589 }
15851590}
15861591
1587- /// Coercion rules for RunEndEncoded: the type that both lhs and rhs
1588- /// can be casted to for the purpose of a computation.
1592+ /// Coercion rules for RunEndEncoded in type unification contexts (see [`type_union_coercion`]).
15891593///
15901594/// Not all operators support REE, if `preserve_ree` is true
15911595/// REE will be preserved if possible
1592- fn ree_comparison_coercion (
1596+ fn ree_type_union_coercion (
15931597 lhs_type : & DataType ,
15941598 rhs_type : & DataType ,
15951599 preserve_ree : bool ,
15961600) -> Option < DataType > {
1597- ree_comparison_coercion_generic ( lhs_type, rhs_type, preserve_ree, comparison_coercion )
1601+ ree_comparison_coercion_generic ( lhs_type, rhs_type, preserve_ree, type_union_coercion )
15981602}
15991603
1600- /// Coercion rules for RunEndEncoded with numeric preference: similar to
1601- /// [`ree_comparison_coercion`] but uses [`comparison_coercion_numeric`]
1602- /// which prefers numeric types over strings when both are present.
1604+ /// Coercion rules for RunEndEncoded in comparison contexts.
16031605///
1604- /// This is used by [`comparison_coercion_numeric`] to maintain consistent
1605- /// numeric-preferring semantics when dealing with REE types.
1606- fn ree_comparison_coercion_numeric (
1606+ /// Prefers numeric types over strings when both are present.
1607+ fn ree_comparison_coercion (
16071608 lhs_type : & DataType ,
16081609 rhs_type : & DataType ,
16091610 preserve_ree : bool ,
16101611) -> Option < DataType > {
1611- ree_comparison_coercion_generic (
1612- lhs_type,
1613- rhs_type,
1614- preserve_ree,
1615- comparison_coercion_numeric,
1616- )
1612+ ree_comparison_coercion_generic ( lhs_type, rhs_type, preserve_ree, comparison_coercion)
16171613}
16181614
16191615/// Coercion rules for string concat.
@@ -1800,8 +1796,8 @@ fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
18001796pub fn like_coercion ( lhs_type : & DataType , rhs_type : & DataType ) -> Option < DataType > {
18011797 string_coercion ( lhs_type, rhs_type)
18021798 . or_else ( || binary_to_string_coercion ( lhs_type, rhs_type) )
1803- . or_else ( || dictionary_comparison_coercion ( lhs_type, rhs_type, false ) )
1804- . or_else ( || ree_comparison_coercion ( lhs_type, rhs_type, false ) )
1799+ . or_else ( || dictionary_type_union_coercion ( lhs_type, rhs_type, false ) )
1800+ . or_else ( || ree_type_union_coercion ( lhs_type, rhs_type, false ) )
18051801 . or_else ( || regex_null_coercion ( lhs_type, rhs_type) )
18061802 . or_else ( || null_coercion ( lhs_type, rhs_type) )
18071803}
@@ -1821,7 +1817,7 @@ fn regex_null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataT
18211817/// This is a union of string coercion rules and dictionary coercion rules
18221818pub fn regex_coercion ( lhs_type : & DataType , rhs_type : & DataType ) -> Option < DataType > {
18231819 string_coercion ( lhs_type, rhs_type)
1824- . or_else ( || dictionary_comparison_coercion ( lhs_type, rhs_type, false ) )
1820+ . or_else ( || dictionary_type_union_coercion ( lhs_type, rhs_type, false ) )
18251821 . or_else ( || regex_null_coercion ( lhs_type, rhs_type) )
18261822}
18271823
0 commit comments