From c86d2b5572bc3b263963b337aa9d619bab34f79c Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 29 Jun 2026 21:19:29 +0200 Subject: [PATCH 1/2] [SPARK-57750][SQL] Assign a name to the error condition _LEGACY_ERROR_TEMP_3084 and set its cause ### What changes were proposed in this pull request? Replace the legacy error condition `_LEGACY_ERROR_TEMP_3084`, raised when a Hive UDF/UDAF/UDTF wrapper class fails to instantiate during function resolution, with the descriptive condition `CANNOT_INSTANTIATE_HIVE_FUNCTION`, and attach the original failure as the exception cause. - Add `CANNOT_INSTANTIATE_HIVE_FUNCTION` (SQLSTATE 38000) to error-conditions.json and remove `_LEGACY_ERROR_TEMP_3084`. - Add `QueryCompilationErrors.cannotInstantiateHiveFunctionError(clazz, e)` that passes `cause = Some(e)` so the inner failure is preserved on the exception chain. - Update `HiveSessionStateBuilder.makeHiveFunctionExpression` to throw the new error and drop the manual `setStackTrace` (the cause now carries the inner stack trace). - Update `HiveUDFSuite` to assert via `checkError` on the new condition, and to read the inner failure via `getCause` where the wrapped message was previously asserted. ### Why are the changes needed? Part of the error-class migration (umbrella SPARK-37935). The legacy condition used a free-form `e` message parameter and did not attach the cause (`getCause` returned null), so callers and tests could not programmatically unwrap the inner failure. ### Does this PR introduce _any_ user-facing change? Yes. The error condition name and message change, and the original exception is now attached as the cause. Within the unreleased master branch only. ### How was this patch tested? By running: - `build/sbt "core/testOnly org.apache.spark.SparkThrowableSuite"` - `build/sbt "hive/testOnly org.apache.spark.sql.hive.execution.HiveUDFSuite"` ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Cursor --- .../resources/error/error-conditions.json | 11 ++--- .../sql/errors/QueryCompilationErrors.scala | 7 +++ .../sql/hive/HiveSessionStateBuilder.scala | 10 +---- .../sql/hive/execution/HiveUDFSuite.scala | 44 ++++++++++++------- 4 files changed, 43 insertions(+), 29 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 10bae53a5a7bf..2d88fb8a8be49 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -498,6 +498,12 @@ ], "sqlState" : "22546" }, + "CANNOT_INSTANTIATE_HIVE_FUNCTION" : { + "message" : [ + "Cannot instantiate the Hive UDF/UDAF/UDTF wrapper class . Check that the function arguments and their types are supported." + ], + "sqlState" : "38000" + }, "CANNOT_INVOKE_IN_TRANSFORMATIONS" : { "message" : [ "Dataset transformations and actions can only be invoked by the driver, not inside of other Dataset transformations; for example, dataset1.map(x => dataset2.values.count() * x) is invalid because the values transformation and count action cannot be performed inside of the dataset1.map transformation. For more information, see SPARK-28702." @@ -11314,11 +11320,6 @@ "Unable to infer the schema. The schema specification is required to create the table ." ] }, - "_LEGACY_ERROR_TEMP_3084" : { - "message" : [ - "No handler for UDF/UDAF/UDTF '': " - ] - }, "_LEGACY_ERROR_TEMP_3086" : { "message" : [ "Cannot persist into Hive metastore as table property keys may not start with 'spark.sql.': " diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index f26c32a07059d..7b393639ea39c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -4602,6 +4602,13 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat messageParameters = Map("invalidClass" -> invalidClass)) } + def cannotInstantiateHiveFunctionError(clazz: String, e: Throwable): Throwable = { + new AnalysisException( + errorClass = "CANNOT_INSTANTIATE_HIVE_FUNCTION", + messageParameters = Map("clazz" -> clazz), + cause = Some(e)) + } + def unsupportedParameterExpression(expr: Expression): Throwable = { new AnalysisException( errorClass = "UNSUPPORTED_EXPR_FOR_PARAMETER", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index 0fbc41492e00a..44cfd339e15c9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -24,7 +24,6 @@ import scala.util.control.NonFatal import org.apache.hadoop.hive.ql.exec.{UDAF, UDF} import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, GenericUDF, GenericUDTF} -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, InvokeProcedures, ReplaceCharWithVarchar, ResolveDataSource, ResolveEventTimeWatermark, ResolveExecuteImmediate, ResolveMetricView, ResolveSessionCatalog, ResolveTranspose} import org.apache.spark.sql.catalyst.analysis.resolver.ResolverExtension import org.apache.spark.sql.catalyst.catalog.{ExternalCatalogWithListener, InvalidUDFClassException} @@ -246,13 +245,8 @@ object HiveUDFExpressionBuilder extends SparkUDFExpressionBuilder { case i: InvocationTargetException => i.getCause case o => o } - val analysisException = new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_3084", - messageParameters = Map( - "clazz" -> clazz.getCanonicalName, - "e" -> e.toString)) - analysisException.setStackTrace(e.getStackTrace) - throw analysisException + throw QueryCompilationErrors.cannotInstantiateHiveFunctionError( + clazz.getCanonicalName, e) } udfExpr.getOrElse { throw QueryCompilationErrors.invalidUDFClassError(clazz.getCanonicalName) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index 6a44e17296c0c..8b73cacd7cab5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -290,7 +290,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton { sql(s"CREATE TEMPORARY FUNCTION testUDFRawList " + s"AS '${classOf[UDFRawList].getName}'") val err = intercept[AnalysisException](sql("SELECT testUDFRawList(s) FROM inputTable")) - assert(err.getMessage.contains( + assert(err.getCause.getMessage.contains( "Raw list type in java is unsupported because Spark cannot infer the element type.")) sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFRawList") @@ -304,7 +304,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton { sql(s"CREATE TEMPORARY FUNCTION testUDFRawMap " + s"AS '${classOf[UDFRawMap].getName}'") val err = intercept[AnalysisException](sql("SELECT testUDFRawMap(s) FROM inputTable")) - assert(err.getMessage.contains( + assert(err.getCause.getMessage.contains( "Raw map type in java is unsupported because Spark cannot infer key and value types.")) sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFRawMap") @@ -318,7 +318,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton { sql(s"CREATE TEMPORARY FUNCTION testUDFWildcardList " + s"AS '${classOf[UDFWildcardList].getName}'") val err = intercept[AnalysisException](sql("SELECT testUDFWildcardList(s) FROM inputTable")) - assert(err.getMessage.contains( + assert(err.getCause.getMessage.contains( "Collection types with wildcards (e.g. List or Map) are unsupported " + "because Spark cannot infer the data type for these type parameters.")) @@ -414,10 +414,16 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton { def testErrorMsgForFunc(funcName: String, className: String): Unit = { withUserDefinedFunction(funcName -> true) { sql(s"CREATE TEMPORARY FUNCTION $funcName AS '$className'") - val message = intercept[AnalysisException] { - sql(s"SELECT $funcName() FROM testUDF") - }.getMessage - assert(message.contains(s"No handler for UDF/UDAF/UDTF '$className'")) + checkError( + exception = intercept[AnalysisException] { + sql(s"SELECT $funcName() FROM testUDF") + }, + condition = "CANNOT_INSTANTIATE_HIVE_FUNCTION", + parameters = Map("clazz" -> className), + context = ExpectedContext( + fragment = s"$funcName()", + start = 7, + stop = 6 + s"$funcName()".length)) } } @@ -678,15 +684,21 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton { sql("SELECT testArraySum(array(1, 1.1, 1.2))"), Seq(Row(3.3))) - val msg = intercept[AnalysisException] { - sql("SELECT testArraySum(1)") - }.getMessage - assert(msg.contains(s"No handler for UDF/UDAF/UDTF '${classOf[ArraySumUDF].getName}'")) - - val msg2 = intercept[AnalysisException] { - sql("SELECT testArraySum(1, 2)") - }.getMessage - assert(msg2.contains(s"No handler for UDF/UDAF/UDTF '${classOf[ArraySumUDF].getName}'")) + checkError( + exception = intercept[AnalysisException] { + sql("SELECT testArraySum(1)") + }, + condition = "CANNOT_INSTANTIATE_HIVE_FUNCTION", + parameters = Map("clazz" -> classOf[ArraySumUDF].getCanonicalName), + context = ExpectedContext(fragment = "testArraySum(1)", start = 7, stop = 21)) + + checkError( + exception = intercept[AnalysisException] { + sql("SELECT testArraySum(1, 2)") + }, + condition = "CANNOT_INSTANTIATE_HIVE_FUNCTION", + parameters = Map("clazz" -> classOf[ArraySumUDF].getCanonicalName), + context = ExpectedContext(fragment = "testArraySum(1, 2)", start = 7, stop = 24)) } } From c7ad9d9f04fdc53f45973beab0567d366b1c684e Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 30 Jun 2026 08:46:19 +0200 Subject: [PATCH 2/2] [SPARK-57750][SQL][TESTS] Assert inner Hive UDF failures via the attached cause Follow-up: use checkError on the inner AnalysisException (the now-attached cause) for the raw/wildcard collection cases in HiveUDFSuite, and read the inner Hive SemanticException message via getCause in UDFSuite (SPARK-21318), which previously relied on the inner failure being embedded in the wrapper exception's message. --- .../org/apache/spark/sql/hive/UDFSuite.scala | 2 +- .../sql/hive/execution/HiveUDFSuite.scala | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala index ef8ae7a408fb8..d8c0848ccae33 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala @@ -204,7 +204,7 @@ class UDFSuite sql(s"SELECT $functionName(value) from $testTableName") } - assert(e.getMessage.contains("Can not get an evaluator of the empty UDAF")) + assert(e.getCause.getMessage.contains("Can not get an evaluator of the empty UDAF")) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index 8b73cacd7cab5..35ae3ea29d6d6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -290,8 +290,10 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton { sql(s"CREATE TEMPORARY FUNCTION testUDFRawList " + s"AS '${classOf[UDFRawList].getName}'") val err = intercept[AnalysisException](sql("SELECT testUDFRawList(s) FROM inputTable")) - assert(err.getCause.getMessage.contains( - "Raw list type in java is unsupported because Spark cannot infer the element type.")) + checkError( + exception = err.getCause.asInstanceOf[AnalysisException], + condition = "_LEGACY_ERROR_TEMP_3090", + parameters = Map.empty) sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFRawList") hiveContext.reset() @@ -304,8 +306,10 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton { sql(s"CREATE TEMPORARY FUNCTION testUDFRawMap " + s"AS '${classOf[UDFRawMap].getName}'") val err = intercept[AnalysisException](sql("SELECT testUDFRawMap(s) FROM inputTable")) - assert(err.getCause.getMessage.contains( - "Raw map type in java is unsupported because Spark cannot infer key and value types.")) + checkError( + exception = err.getCause.asInstanceOf[AnalysisException], + condition = "_LEGACY_ERROR_TEMP_3091", + parameters = Map.empty) sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFRawMap") hiveContext.reset() @@ -318,9 +322,10 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton { sql(s"CREATE TEMPORARY FUNCTION testUDFWildcardList " + s"AS '${classOf[UDFWildcardList].getName}'") val err = intercept[AnalysisException](sql("SELECT testUDFWildcardList(s) FROM inputTable")) - assert(err.getCause.getMessage.contains( - "Collection types with wildcards (e.g. List or Map) are unsupported " + - "because Spark cannot infer the data type for these type parameters.")) + checkError( + exception = err.getCause.asInstanceOf[AnalysisException], + condition = "_LEGACY_ERROR_TEMP_3092", + parameters = Map.empty) sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFWildcardList") hiveContext.reset()