@@ -54,20 +54,19 @@ object CometStringRepeat extends CometExpressionSerde[StringRepeat] {
5454class CometCaseConversionBase [T <: Expression ](function : String )
5555 extends CometScalarFunction [T ](function) {
5656
57- override def getIncompatibleReasons (): Seq [String ] = Seq (
58- " Results can vary depending on locale and character set." +
59- s " Requires ` ${CometConf .COMET_CASE_CONVERSION_ENABLED .key}=true` to enable. " )
57+ override def getSupportLevel (expr : T ): SupportLevel = Compatible ()
6058
6159 override def convert (expr : T , inputs : Seq [Attribute ], binding : Boolean ): Option [Expr ] = {
62- if (! CometConf .COMET_CASE_CONVERSION_ENABLED .get()) {
63- withInfo(
64- expr,
65- " Comet is not compatible with Spark for case conversion in " +
66- s " locale-specific cases. Set ${CometConf .COMET_CASE_CONVERSION_ENABLED .key}=true " +
67- " to enable it anyway." )
68- return None
60+ if (CometConf .COMET_CASE_CONVERSION_ENABLED .get()) {
61+ // Native scalar function: faster but does not match Spark for locale-specific characters
62+ // (e.g. Turkish dotted/dotless I). Opt-in.
63+ super .convert(expr, inputs, binding)
64+ } else {
65+ // Default: route through the codegen dispatcher so Spark's own doGenCode runs inside the
66+ // Comet pipeline. This guarantees Spark-compatible behavior across 3.4 / 3.5 / 4.0.
67+ // Falls through to Spark when the dispatcher is disabled.
68+ CometScalaUDF .emitJvmCodegenDispatch(expr, inputs, binding)
6969 }
70- super .convert(expr, inputs, binding)
7170 }
7271}
7372
@@ -86,20 +85,20 @@ object CometLength extends CometScalarFunction[Length]("length") {
8685
8786object CometInitCap extends CometScalarFunction [InitCap ](" initcap" ) {
8887
89- override def getIncompatibleReasons (): Seq [String ] = Seq (
90- " Treats hyphen as a word separator (e.g. `robert rose-smith` produces `Robert Rose-Smith`" +
91- " instead of Spark's `Robert Rose-smith`)" +
92- " (https://github.com/apache/datafusion-comet/issues/1052)" )
93-
94- override def getSupportLevel (expr : InitCap ): SupportLevel = {
95- // Behavior differs from Spark. One example is that for the input "robert rose-smith", Spark
96- // will produce "Robert Rose-smith", but Comet will produce "Robert Rose-Smith".
97- // https://github.com/apache/datafusion-comet/issues/1052
98- Incompatible (None )
99- }
88+ override def getSupportLevel (expr : InitCap ): SupportLevel = Compatible ()
10089
10190 override def convert (expr : InitCap , inputs : Seq [Attribute ], binding : Boolean ): Option [Expr ] = {
102- super .convert(expr, inputs, binding)
91+ if (CometConf .isExprAllowIncompat(getExprConfigName(expr))) {
92+ // Native path: faster but treats hyphen as a word separator (e.g.
93+ // `robert rose-smith` produces `Robert Rose-Smith` instead of Spark's `Robert Rose-smith`).
94+ // https://github.com/apache/datafusion-comet/issues/1052
95+ super .convert(expr, inputs, binding)
96+ } else {
97+ // Default: route through the codegen dispatcher so Spark's own doGenCode runs inside the
98+ // Comet pipeline. This guarantees Spark-compatible behavior across 3.4 / 3.5 / 4.0.
99+ // Falls through to Spark when the dispatcher is disabled.
100+ CometScalaUDF .emitJvmCodegenDispatch(expr, inputs, binding)
101+ }
103102 }
104103}
105104
0 commit comments