diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala index 1ae90e1845..6c4a92f312 100644 --- a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala +++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala @@ -290,21 +290,22 @@ object CometSparkSessionExtensions extends Logging { * @return * `node` with fallback reasons attached (as a side effect on its tag map). */ - def withInfo[T <: TreeNode[_]](node: T, info: String, exprs: T*): T = { + def withFallbackReason[T <: TreeNode[_]](node: T, info: String, exprs: T*): T = { // support existing approach of passing in multiple infos in a newline-delimited string val infoSet = if (info == null || info.isEmpty) { Set.empty[String] } else { info.split("\n").toSet } - withInfos(node, infoSet, exprs: _*) + withFallbackReasons(node, infoSet, exprs: _*) } /** * Record one or more fallback reasons on a `TreeNode` and roll up reasons from any child nodes. - * This is the set-valued form of [[withInfo]]; see that overload for the full contract. + * This is the set-valued form of [[withFallbackReason]]; see that overload for the full + * contract. * - * Reasons are accumulated (never overwritten) on the node's `EXTENSION_INFO` tag and are + * Reasons are accumulated (never overwritten) on the node's `FALLBACK_REASONS` tag and are * surfaced in extended explain output. When `COMET_LOG_FALLBACK_REASONS` is enabled, each new * reason is also emitted as a warning. * @@ -320,16 +321,16 @@ object CometSparkSessionExtensions extends Logging { * @return * `node` with fallback reasons attached (as a side effect on its tag map). */ - def withInfos[T <: TreeNode[_]](node: T, info: Set[String], exprs: T*): T = { + def withFallbackReasons[T <: TreeNode[_]](node: T, info: Set[String], exprs: T*): T = { if (CometConf.COMET_LOG_FALLBACK_REASONS.get()) { for (reason <- info) { logWarning(s"Comet cannot accelerate ${node.getClass.getSimpleName} because: $reason") } } - val existingNodeInfos = node.getTagValue(CometExplainInfo.EXTENSION_INFO) + val existingNodeInfos = node.getTagValue(CometExplainInfo.FALLBACK_REASONS) val newNodeInfo = (existingNodeInfos ++ exprs - .flatMap(_.getTagValue(CometExplainInfo.EXTENSION_INFO))).flatten.toSet - node.setTagValue(CometExplainInfo.EXTENSION_INFO, newNodeInfo ++ info) + .flatMap(_.getTagValue(CometExplainInfo.FALLBACK_REASONS))).flatten.toSet + node.setTagValue(CometExplainInfo.FALLBACK_REASONS, newNodeInfo ++ info) node } @@ -347,17 +348,17 @@ object CometSparkSessionExtensions extends Logging { * @return * `node` with the rolled-up reasons attached (as a side effect on its tag map). */ - def withInfo[T <: TreeNode[_]](node: T, exprs: T*): T = { - withInfos(node, Set.empty, exprs: _*) + def withFallbackReason[T <: TreeNode[_]](node: T, exprs: T*): T = { + withFallbackReasons(node, Set.empty, exprs: _*) } /** - * True if any fallback reason has been recorded on `node` (via [[withInfo]] / [[withInfos]]). - * Callers that need to short-circuit when a prior rule pass has already decided a node falls - * back can use this as the sticky signal. + * True if any fallback reason has been recorded on `node` (via [[withFallbackReason]] / + * [[withFallbackReasons]]). Callers that need to short-circuit when a prior rule pass has + * already decided a node falls back can use this as the sticky signal. */ - def hasExplainInfo(node: TreeNode[_]): Boolean = { - node.getTagValue(CometExplainInfo.EXTENSION_INFO).exists(_.nonEmpty) + def hasFallbackReason(node: TreeNode[_]): Boolean = { + node.getTagValue(CometExplainInfo.FALLBACK_REASONS).exists(_.nonEmpty) } } diff --git a/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala b/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala index d30a1fe788..592b1955f2 100644 --- a/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala +++ b/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala @@ -50,15 +50,17 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator { } def getFallbackReasons(plan: SparkPlan): Seq[String] = { - extensionInfo(plan).toSeq.sorted + fallbackReasons(plan).toSeq.sorted } - private[comet] def extensionInfo(node: TreeNode[_]): Set[String] = { + private[comet] def fallbackReasons(node: TreeNode[_]): Set[String] = { var info = mutable.Seq[String]() val sorted = sortup(node) sorted.foreach { p => val all: Set[String] = - getActualPlan(p).getTagValue(CometExplainInfo.EXTENSION_INFO).getOrElse(Set.empty[String]) + getActualPlan(p) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .getOrElse(Set.empty[String]) for (s <- all) { info = info :+ s } @@ -120,7 +122,7 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator { outString.append(if (lastChildren.last) "+- " else ":- ") } - val tagValue = node.getTagValue(CometExplainInfo.EXTENSION_INFO) + val tagValue = node.getTagValue(CometExplainInfo.FALLBACK_REASONS) val str = if (tagValue.nonEmpty) { s" ${node.nodeName} [COMET: ${tagValue.get.mkString(", ")}]" } else { @@ -212,7 +214,7 @@ object CometCoverageStats { } object CometExplainInfo { - val EXTENSION_INFO = new TreeNodeTag[Set[String]]("CometExtensionInfo") + val FALLBACK_REASONS = new TreeNodeTag[Set[String]]("CometFallbackReasons") def getActualPlan(node: TreeNode[_]): TreeNode[_] = { node match { diff --git a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala index 042fd9ced3..6e77182cac 100644 --- a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala +++ b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala @@ -103,8 +103,8 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim { /** * Plan-time predicate. `None` greenlights the serde to emit the codegen proto; `Some(reason)` - * forces a Spark fallback (typically `withInfo(...) + None`) so the operator falls back cleanly - * rather than crashing the Janino compile at execute time. + * forces a Spark fallback (typically `withFallbackReason(...) + None`) so the operator falls + * back cleanly rather than crashing the Janino compile at execute time. * * Checks every `BoundReference`'s data type and the root `expr.dataType` against * [[isSupportedDataType]], rejects aggregates / generators / `CodegenFallback` (other than diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala index 400229a402..8ecfdfe49c 100644 --- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala +++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{ArrayType, DataType, DataTypes, DecimalType, NullType, StructType, TimestampNTZType, TimestampType} import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.{isSpark40Plus, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{isSpark40Plus, withFallbackReason} import org.apache.comet.serde.{CometExpressionSerde, Compatible, ExprOuterClass, Incompatible, SupportLevel, Unsupported} import org.apache.comet.serde.ExprOuterClass.Expr import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProtoInternal, serializeDataType} @@ -81,7 +81,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { if (childExpr.isDefined) { castToProto(cast, cast.timeZoneId, cast.dataType, childExpr.get, cometEvalMode) } else { - withInfo(cast, cast.child) + withFallbackReason(cast, cast.child) None } } @@ -131,7 +131,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { .setCast(castBuilder) .build()) case _ => - withInfo(expr, s"Unsupported datatype in castToProto: $dt") + withFallbackReason(expr, s"Unsupported datatype in castToProto: $dt") None } } diff --git a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala index aeb7db40ad..d116d2f407 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala @@ -328,8 +328,8 @@ case class CometExecRule(session: SparkSession) } else { // copy fallback reasons to the original plan newPlan - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => withInfos(plan, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => withFallbackReasons(plan, reasons)) // return the original plan plan } @@ -382,8 +382,8 @@ case class CometExecRule(session: SparkSession) // reasons. // 3. The operator has children that could not be converted, so execution // has already fallen back to Spark. - if (op.children.forall(_.isInstanceOf[CometNativeExec]) && !hasExplainInfo(op)) { - withInfo(op, s"${op.nodeName} is not supported") + if (op.children.forall(_.isInstanceOf[CometNativeExec]) && !hasFallbackReason(op)) { + withFallbackReason(op, s"${op.nodeName} is not supported") } else { op } @@ -587,7 +587,7 @@ case class CometExecRule(session: SparkSession) // config is enabled) if (CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.get()) { val info = new ExtendedExplainInfo() - if (info.extensionInfo(newPlan).nonEmpty) { + if (info.fallbackReasons(newPlan).nonEmpty) { logWarning( "Comet cannot execute some parts of this plan natively " + s"(set ${CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key}=false " + @@ -693,7 +693,9 @@ case class CometExecRule(session: SparkSession) case other => Seq(other) } if (!dataProducingChildren.forall(_.isInstanceOf[CometNativeExec])) { - withInfo(op, "Cannot perform native operation because input is not in Arrow format") + withFallbackReason( + op, + "Cannot perform native operation because input is not in Arrow format") return None } } @@ -721,7 +723,7 @@ case class CometExecRule(session: SparkSession) if (handler.enabledConfig.forall(_.get(op.conf))) { handler.getSupportLevel(op) match { case Unsupported(notes) => - withInfo(op, notes.getOrElse("")) + withFallbackReason(op, notes.getOrElse("")) false case Incompatible(notes) => val allowIncompat = CometConf.isOperatorAllowIncompat(opName) @@ -735,7 +737,7 @@ case class CometExecRule(session: SparkSession) true } else { val optionalNotes = notes.map(str => s" ($str)").getOrElse("") - withInfo( + withFallbackReason( op, s"$opName is not fully compatible with Spark$optionalNotes. " + s"To enable it anyway, set $incompatConf=true. " + @@ -749,7 +751,7 @@ case class CometExecRule(session: SparkSession) true } } else { - withInfo( + withFallbackReason( op, s"Native support for operator $opName is disabled. " + s"Set ${handler.enabledConfig.get.key}=true to enable it.") diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala index 7601fa1c6b..6dfcdcff25 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala @@ -42,7 +42,7 @@ import org.apache.spark.sql.types._ import org.apache.comet.{CometConf, DataTypeSupport} import org.apache.comet.CometConf._ -import org.apache.comet.CometSparkSessionExtensions.{isCometLoaded, isSpark35Plus, withInfo, withInfos} +import org.apache.comet.CometSparkSessionExtensions.{isCometLoaded, isSpark35Plus, withFallbackReason, withFallbackReasons} import org.apache.comet.DataTypeSupport.isComplexType import org.apache.comet.iceberg.{CometIcebergNativeScanMetadata, IcebergReflection} import org.apache.comet.objectstore.NativeConfig @@ -119,13 +119,13 @@ case class CometScanRule(session: SparkSession) // Tagged by CometSpark34AqeDppFallbackRule on Spark < 3.5 to keep a peer scan // Spark-native for canonical symmetry in SMJ self-joins (SPARK-32509). case scan if scan.getTagValue(CometScanRule.SKIP_COMET_SCAN_TAG).isDefined => - withInfo(scan, "AQE DPP region fallback (Spark < 3.5)") + withFallbackReason(scan, "AQE DPP region fallback (Spark < 3.5)") case scan if !CometConf.COMET_NATIVE_SCAN_ENABLED.get(conf) => - withInfo(scan, "Comet Scan is not enabled") + withFallbackReason(scan, "Comet Scan is not enabled") case scan if hasMetadataCol(scan) => - withInfo(scan, "Metadata column is not supported") + withFallbackReason(scan, "Metadata column is not supported") // data source V1 case scanExec: FileSourceScanExec => @@ -134,7 +134,7 @@ case class CometScanRule(session: SparkSession) // data source V2 case scanExec: BatchScanExec => if (isIcebergMetadataTable(scanExec)) { - withInfo(scanExec, "Iceberg Metadata tables are not supported") + withFallbackReason(scanExec, "Iceberg Metadata tables are not supported") } else { transformV2Scan(scanExec) } @@ -157,13 +157,13 @@ case class CometScanRule(session: SparkSession) // On 3.5+, CometPlanAdaptiveDynamicPruningFilters rewrites SABs directly and this fallback // is not needed. if (!isSpark35Plus && scanExec.partitionFilters.exists(isAqeDynamicPruningFilter)) { - return withInfo(scanExec, "AQE Dynamic Partition Pruning requires Spark 3.5+") + return withFallbackReason(scanExec, "AQE Dynamic Partition Pruning requires Spark 3.5+") } scanExec.relation match { case r: HadoopFsRelation => if (!CometScanExec.isFileFormatSupported(r.fileFormat)) { - return withInfo(scanExec, s"Unsupported file format ${r.fileFormat}") + return withFallbackReason(scanExec, s"Unsupported file format ${r.fileFormat}") } val hadoopConf = r.sparkSession.sessionState.newHadoopConfWithOptions(r.options) @@ -176,7 +176,7 @@ case class CometScanRule(session: SparkSession) // Spark already converted these to Java-native types, so we can't check SQL types. // ArrayBasedMapData, GenericInternalRow, GenericArrayData correspond to maps, structs, // and arrays respectively. - withInfo( + withFallbackReason( scanExec, "Full native scan disabled because default values for nested types are not supported") return scanExec @@ -185,7 +185,7 @@ case class CometScanRule(session: SparkSession) nativeScan(plan, session, scanExec, r, hadoopConf).getOrElse(scanExec) case _ => - withInfo(scanExec, s"Unsupported relation ${scanExec.relation}") + withFallbackReason(scanExec, s"Unsupported relation ${scanExec.relation}") } } @@ -196,7 +196,9 @@ case class CometScanRule(session: SparkSession) r: HadoopFsRelation, hadoopConf: Configuration): Option[SparkPlan] = { if (!COMET_EXEC_ENABLED.get()) { - withInfo(scanExec, s"Native Parquet scan requires ${COMET_EXEC_ENABLED.key} to be enabled") + withFallbackReason( + scanExec, + s"Native Parquet scan requires ${COMET_EXEC_ENABLED.key} to be enabled") return None } // Disabling the vectorized reader opts into parquet-mr's permissive behavior @@ -205,7 +207,7 @@ case class CometScanRule(session: SparkSession) // replace the scan via COMET_SCAN_ALLOW_DISABLED_PARQUET_VECTORIZED_READER. if (!conf.parquetVectorizedReaderEnabled && !COMET_SCAN_ALLOW_DISABLED_PARQUET_VECTORIZED_READER.get()) { - withInfo( + withFallbackReason( scanExec, "Native Parquet scan is incompatible with " + s"${SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key}=false; set " + @@ -216,11 +218,11 @@ case class CometScanRule(session: SparkSession) return None } if (encryptionEnabled(hadoopConf) && !isEncryptionConfigSupported(hadoopConf)) { - withInfo(scanExec, "Native Parquet scan does not support encryption") + withFallbackReason(scanExec, "Native Parquet scan does not support encryption") return None } if (scanExec.fileConstantMetadataColumns.nonEmpty) { - withInfo(scanExec, "Native DataFusion scan does not support metadata columns") + withFallbackReason(scanExec, "Native DataFusion scan does not support metadata columns") return None } // input_file_name, input_file_block_start, and input_file_block_length read from @@ -231,14 +233,14 @@ case class CometScanRule(session: SparkSession) case _: InputFileName | _: InputFileBlockStart | _: InputFileBlockLength => true case _ => false }))) { - withInfo( + withFallbackReason( scanExec, "Native DataFusion scan is not compatible with input_file_name, " + "input_file_block_start, or input_file_block_length") return None } if (ShimFileFormat.findRowIndexColumnIndexInSchema(scanExec.requiredSchema) >= 0) { - withInfo(scanExec, "Native DataFusion scan does not support row index generation") + withFallbackReason(scanExec, "Native DataFusion scan does not support row index generation") return None } if (!isSchemaSupported(scanExec, r)) { @@ -288,7 +290,7 @@ case class CometScanRule(session: SparkSession) scanExec.clone().asInstanceOf[BatchScanExec], runtimeFilters = scanExec.runtimeFilters) } else { - withInfos(scanExec, fallbackReasons.toSet) + withFallbackReasons(scanExec, fallbackReasons.toSet) } // Iceberg scan - detected by class name. SparkStagedScan covers reads issued by @@ -301,13 +303,13 @@ case class CometScanRule(session: SparkSession) if (!COMET_ICEBERG_NATIVE_ENABLED.get()) { fallbackReasons += "Native Iceberg scan disabled because " + s"${COMET_ICEBERG_NATIVE_ENABLED.key} is not enabled" - return withInfos(scanExec, fallbackReasons.toSet) + return withFallbackReasons(scanExec, fallbackReasons.toSet) } if (!COMET_EXEC_ENABLED.get()) { fallbackReasons += "Native Iceberg scan disabled because " + s"${COMET_EXEC_ENABLED.key} is not enabled" - return withInfos(scanExec, fallbackReasons.toSet) + return withFallbackReasons(scanExec, fallbackReasons.toSet) } val typeChecker = CometScanTypeChecker() @@ -396,7 +398,7 @@ case class CometScanRule(session: SparkSession) case Some(m) => m case None => fallbackReasons += "Failed to extract Iceberg metadata via reflection" - return withInfos(scanExec, fallbackReasons.toSet) + return withFallbackReasons(scanExec, fallbackReasons.toSet) } // Now perform all validation using the pre-extracted metadata @@ -439,7 +441,7 @@ case class CometScanRule(session: SparkSession) case e: Exception => fallbackReasons += "Iceberg reflection failure: Could not validate " + s"FileScanTasks: ${e.getMessage}" - return withInfos(scanExec, fallbackReasons.toSet) + return withFallbackReasons(scanExec, fallbackReasons.toSet) } // Check if all files are Parquet format and use supported filesystem schemes @@ -639,11 +641,11 @@ case class CometScanRule(session: SparkSession) runtimeFilters = scanExec.runtimeFilters, nativeIcebergScanMetadata = Some(metadata)) } else { - withInfos(scanExec, fallbackReasons.toSet) + withFallbackReasons(scanExec, fallbackReasons.toSet) } case other => - withInfo( + withFallbackReason( scanExec, s"Unsupported scan: ${other.getClass.getName}. " + "Comet Scan only supports Parquet and Iceberg Parquet file formats") @@ -674,7 +676,7 @@ case class CometScanRule(session: SparkSession) val schemaSupported = typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons) if (!schemaSupported) { - withInfo( + withFallbackReason( scanExec, s"Unsupported schema ${scanExec.requiredSchema}: ${fallbackReasons.mkString(", ")}") return false @@ -682,7 +684,7 @@ case class CometScanRule(session: SparkSession) val partitionSchemaSupported = typeChecker.isSchemaSupported(r.partitionSchema, fallbackReasons) if (!partitionSchemaSupported) { - withInfo( + withFallbackReason( scanExec, s"Unsupported partitioning schema ${scanExec.requiredSchema}: " + fallbackReasons.mkString(", ")) diff --git a/spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala b/spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala index 6a408ee745..2864eea4ed 100644 --- a/spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala +++ b/spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.Join import org.apache.spark.sql.execution.{SortExec, SparkPlan} import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason /** * Adapted from equivalent rule in Apache Gluten. @@ -69,7 +69,7 @@ object RewriteJoin extends JoinSelectionHelper { getSmjBuildSide(smj) match { case Some(BuildRight) if smj.joinType == LeftSemi => // LeftSemi https://github.com/apache/datafusion-comet/issues/2667 - withInfo( + withFallbackReason( smj, "Cannot rewrite SortMergeJoin to HashJoin: " + s"BuildRight with ${smj.joinType} is not supported") diff --git a/spark/src/main/scala/org/apache/comet/serde/CometBloomFilterMightContain.scala b/spark/src/main/scala/org/apache/comet/serde/CometBloomFilterMightContain.scala index a13b685ea6..75e0f532e3 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometBloomFilterMightContain.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometBloomFilterMightContain.scala @@ -21,7 +21,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, BloomFilterMightContain} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal object CometBloomFilterMightContain extends CometExpressionSerde[BloomFilterMightContain] { @@ -45,7 +45,7 @@ object CometBloomFilterMightContain extends CometExpressionSerde[BloomFilterMigh .setBloomFilterMightContain(builder) .build()) } else { - withInfo(expr, bloomFilter, value) + withFallbackReason(expr, bloomFilter, value) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/CometScalaUDF.scala b/spark/src/main/scala/org/apache/comet/serde/CometScalaUDF.scala index 852e80ae44..a1d5be84ff 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometScalaUDF.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometScalaUDF.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, import org.apache.spark.sql.types.BinaryType import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.codegen.CometBatchKernelCodegen import org.apache.comet.serde.ExprOuterClass.Expr import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, serializeDataType} @@ -61,16 +61,17 @@ object CometScalaUDF extends CometExpressionSerde[ScalaUDF] { * Arrow-direct codegen dispatcher. The dispatcher will Janino-compile `expr.doGenCode` into a * batch kernel on first invocation per task. * - * Returns `None` (with `withInfo` tagging the reason) when the dispatcher is disabled via - * [[CometConf.COMET_SCALA_UDF_CODEGEN_ENABLED]] or when [[CometBatchKernelCodegen.canHandle]] - * refuses the expression tree. Callers should treat `None` as a clean Spark-fallback signal. + * Returns `None` (with `withFallbackReason` tagging the reason) when the dispatcher is disabled + * via [[CometConf.COMET_SCALA_UDF_CODEGEN_ENABLED]] or when + * [[CometBatchKernelCodegen.canHandle]] refuses the expression tree. Callers should treat + * `None` as a clean Spark-fallback signal. */ def emitJvmCodegenDispatch( expr: Expression, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = { if (!CometConf.COMET_SCALA_UDF_CODEGEN_ENABLED.get()) { - withInfo( + withFallbackReason( expr, s"${CometConf.COMET_SCALA_UDF_CODEGEN_ENABLED.key}=false; expression has no native " + "path so the plan falls back to Spark") @@ -82,10 +83,11 @@ object CometScalaUDF extends CometExpressionSerde[ScalaUDF] { val attrs = expr.collect { case a: AttributeReference => a }.distinct val boundExpr = BindReferences.bindReference(expr, AttributeSeq(attrs)) - // Gate at plan time. Surface the reason via withInfo rather than crashing Janino at execute. + // Gate at plan time. Surface the reason via withFallbackReason rather than crashing Janino + // at execute. CometBatchKernelCodegen.canHandle(boundExpr) match { case Some(reason) => - withInfo(expr, reason) + withFallbackReason(expr, reason) return None case None => } @@ -133,7 +135,7 @@ class CometCodegenDispatch[T <: Expression] extends CometExpressionSerde[T] { // Intentionally no getCompatibleNotes override: the docs generator emits compat notes under // a heading that promises "no additional configuration required". The dispatcher flag is a // global concern documented elsewhere; tagging each expression here would contradict the - // heading. When the flag is off, `convert` returns None with a clear withInfo reason that + // heading. When the flag is off, `convert` returns None with a clear fallback reason that // shows up in EXPLAIN, which is the right place for that signal. override def convert(expr: T, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = CometScalaUDF.emitJvmCodegenDispatch(expr, inputs, binding) diff --git a/spark/src/main/scala/org/apache/comet/serde/CometScalarFunction.scala b/spark/src/main/scala/org/apache/comet/serde/CometScalarFunction.scala index aa3bf775fb..2e056ae886 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometScalarFunction.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometScalarFunction.scala @@ -22,13 +22,13 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.comet.serde.ExprOuterClass.Expr -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} /** Serde for scalar function. */ case class CometScalarFunction[T <: Expression](name: String) extends CometExpressionSerde[T] { override def convert(expr: T, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = { val childExpr = expr.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto(name, childExpr: _*) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/CometScalarSubquery.scala b/spark/src/main/scala/org/apache/comet/serde/CometScalarSubquery.scala index b1f5a28271..329c91f49e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometScalarSubquery.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometScalarSubquery.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.execution.ScalarSubquery -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{serializeDataType, supportedDataType} object CometScalarSubquery extends CometExpressionSerde[ScalarSubquery] { @@ -33,7 +33,9 @@ object CometScalarSubquery extends CometExpressionSerde[ScalarSubquery] { if (supportedDataType(expr.dataType)) { val dataType = serializeDataType(expr.dataType) if (dataType.isEmpty) { - withInfo(expr, s"Failed to serialize datatype ${expr.dataType} for scalar subquery") + withFallbackReason( + expr, + s"Failed to serialize datatype ${expr.dataType} for scalar subquery") return None } @@ -43,7 +45,7 @@ object CometScalarSubquery extends CometExpressionSerde[ScalarSubquery] { .setDatatype(dataType.get) Some(ExprOuterClass.Expr.newBuilder().setSubquery(builder).build()) } else { - withInfo(expr, s"Unsupported data type: ${expr.dataType}") + withFallbackReason(expr, s"Unsupported data type: ${expr.dataType}") None } diff --git a/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala b/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala index 3647645109..3dcd67a65d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Descending, NullsFirst, NullsLast, SortOrder} import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal object CometSortOrder extends CometExpressionSerde[SortOrder] { @@ -73,7 +73,7 @@ object CometSortOrder extends CometExpressionSerde[SortOrder] { .setSortOrder(sortOrderBuilder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 2fdc197956..0bdc02a790 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions._ import org.apache.comet.parquet.CometParquetUtils import org.apache.comet.serde.ExprOuterClass.{AggExpr, Expr, ScalarFunc} @@ -560,7 +560,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { if (aggExpr.isDistinct && aggExpr.aggregateFunction.children.length > 1 && aggExpr.aggregateFunction.prettyName != "count") { - withInfo(aggExpr, s"Multi-column distinct aggregate not supported for: $aggExpr") + withFallbackReason(aggExpr, s"Multi-column distinct aggregate not supported for: $aggExpr") return None } @@ -571,7 +571,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { val aggHandler = handler.asInstanceOf[CometAggregateExpressionSerde[AggregateFunction]] val exprConfName = aggHandler.getExprConfigName(fn) if (!CometConf.isExprEnabled(exprConfName)) { - withInfo( + withFallbackReason( aggExpr, "Expression support is disabled. Set " + s"${CometConf.getExprEnabledConfigKey(exprConfName)}=true to enable it.") @@ -579,7 +579,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { } aggHandler.getSupportLevel(fn) match { case Unsupported(notes) => - withInfo(fn, notes.getOrElse("")) + withFallbackReason(fn, notes.getOrElse("")) None case Incompatible(notes) => val exprAllowIncompat = CometConf.isExprAllowIncompat(exprConfName) @@ -593,7 +593,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { aggHandler.convert(aggExpr, fn, inputs, binding, conf) } else { val optionalNotes = notes.map(str => s" ($str)").getOrElse("") - withInfo( + withFallbackReason( fn, s"$fn is not fully compatible with Spark$optionalNotes. To enable it anyway, " + s"set ${CometConf.getExprAllowIncompatConfigKey(exprConfName)}=true. " + @@ -607,7 +607,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { aggHandler.convert(aggExpr, fn, inputs, binding, conf) } case _ => - withInfo( + withFallbackReason( aggExpr, s"unsupported Spark aggregate function: ${fn.prettyName}", fn.children: _*) @@ -624,7 +624,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { if (aggExpr.filter.isDefined && aggExpr.mode == Partial) { val filterProto = exprToProto(aggExpr.filter.get, inputs, binding) if (filterProto.isEmpty) { - withInfo(aggExpr, aggExpr.filter.get) + withFallbackReason(aggExpr, aggExpr.filter.get) return None } builder.setFilter(filterProto.get) @@ -696,7 +696,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { def convert[T <: Expression](expr: T, handler: CometExpressionSerde[T]): Option[Expr] = { val exprConfName = handler.getExprConfigName(expr) if (!CometConf.isExprEnabled(exprConfName)) { - withInfo( + withFallbackReason( expr, "Expression support is disabled. Set " + s"${CometConf.getExprEnabledConfigKey(exprConfName)}=true to enable it.") @@ -704,7 +704,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { } handler.getSupportLevel(expr) match { case Unsupported(notes) => - withInfo(expr, notes.getOrElse("")) + withFallbackReason(expr, notes.getOrElse("")) None case Incompatible(notes) => val exprAllowIncompat = CometConf.isExprAllowIncompat(exprConfName) @@ -718,7 +718,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { handler.convert(expr, inputs, binding) } else { val optionalNotes = notes.map(str => s" ($str)").getOrElse("") - withInfo( + withFallbackReason( expr, s"$expr is not fully compatible with Spark$optionalNotes. To enable it anyway, " + s"set ${CometConf.getExprAllowIncompatConfigKey(exprConfName)}=true. " + @@ -746,7 +746,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { case Some(handler) => convert(expr, handler.asInstanceOf[CometExpressionSerde[Expression]]) case _ => - withInfo(expr, s"${expr.prettyName} is not supported", expr.children: _*) + withFallbackReason(expr, s"${expr.prettyName} is not supported", expr.children: _*) None } }) @@ -797,7 +797,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { .newBuilder(), inner).build()) } else { - withInfo(expr, child) + withFallbackReason(expr, child) None } } @@ -827,7 +827,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { .newBuilder(), inner).build()) } else { - withInfo(expr, left, right) + withFallbackReason(expr, left, right) None } } @@ -864,14 +864,14 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { Some(ExprOuterClass.Expr.newBuilder().setScalarFunc(builder).build()) } - // Utility method. Adds explain info if the result of calling exprToProto is None - def optExprWithInfo( + // Utility method. Adds fallback reason if the result of calling exprToProto is None + def optExprWithFallbackReason( optExpr: Option[Expr], expr: Expression, childExpr: Expression*): Option[Expr] = { optExpr match { case None => - withInfo(expr, childExpr: _*) + withFallbackReason(expr, childExpr: _*) None case o => o } @@ -908,7 +908,9 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { case _ => supportedScalarSortElementType(sortOrder.head.dataType) } if (!canSort) { - withInfo(op, s"Sort on single column of type ${sortOrder.head.dataType} is not supported") + withFallbackReason( + op, + s"Sort on single column of type ${sortOrder.head.dataType} is not supported") false } else { true diff --git a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala index a9ae740900..48bd555cf1 100644 --- a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala +++ b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.types.{ByteType, DataTypes, DecimalType, IntegerType import org.apache.comet.CometConf import org.apache.comet.CometConf.COMET_EXEC_STRICT_FLOATING_POINT -import org.apache.comet.CometSparkSessionExtensions.{isSpark41Plus, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{isSpark41Plus, withFallbackReason} import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProto, serializeDataType} import org.apache.comet.shims.CometEvalModeUtil @@ -43,14 +43,14 @@ object CometMin extends CometAggregateExpressionSerde[Min] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.minMaxDataTypeSupported(expr.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${expr.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${expr.dataType}") return None } if (expr.dataType == DataTypes.FloatType || expr.dataType == DataTypes.DoubleType) { if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get()) { // https://github.com/apache/datafusion-comet/issues/2448 - withInfo( + withFallbackReason( aggExpr, s"floating-point not supported when ${COMET_EXEC_STRICT_FLOATING_POINT.key}=true") return None @@ -72,10 +72,10 @@ object CometMin extends CometAggregateExpressionSerde[Min] { .setMin(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${expr.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${expr.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -92,14 +92,14 @@ object CometMax extends CometAggregateExpressionSerde[Max] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.minMaxDataTypeSupported(expr.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${expr.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${expr.dataType}") return None } if (expr.dataType == DataTypes.FloatType || expr.dataType == DataTypes.DoubleType) { if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get()) { // https://github.com/apache/datafusion-comet/issues/2448 - withInfo( + withFallbackReason( aggExpr, s"floating-point not supported when ${COMET_EXEC_STRICT_FLOATING_POINT.key}=true") return None @@ -121,10 +121,10 @@ object CometMax extends CometAggregateExpressionSerde[Max] { .setMax(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${expr.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${expr.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -147,7 +147,7 @@ object CometCount extends CometAggregateExpressionSerde[Count] { .setCount(builder) .build()) } else { - withInfo(aggExpr, expr.children: _*) + withFallbackReason(aggExpr, expr.children: _*) None } } @@ -166,7 +166,7 @@ object CometAverage extends CometAggregateExpressionSerde[Average] { conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.avgDataTypeSupported(avg.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${avg.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${avg.dataType}") return None } @@ -198,10 +198,10 @@ object CometAverage extends CometAggregateExpressionSerde[Average] { .setAvg(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${avg.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${avg.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -219,7 +219,7 @@ object CometSum extends CometAggregateExpressionSerde[Sum] { conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.sumDataTypeSupported(sum.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${sum.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${sum.dataType}") return None } @@ -241,9 +241,9 @@ object CometSum extends CometAggregateExpressionSerde[Sum] { .build()) } else { if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${sum.dataType} is not supported", sum.child) + withFallbackReason(aggExpr, s"datatype ${sum.dataType} is not supported", sum.child) } else { - withInfo(aggExpr, sum.child) + withFallbackReason(aggExpr, sum.child) } None } @@ -277,10 +277,10 @@ object CometFirst extends CometAggregateExpressionSerde[First] { .setFirst(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${first.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${first.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -313,10 +313,10 @@ object CometLast extends CometAggregateExpressionSerde[Last] { .setLast(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${last.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${last.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -332,7 +332,7 @@ object CometBitAndAgg extends CometAggregateExpressionSerde[BitAndAgg] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.bitwiseAggTypeSupported(bitAnd.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${bitAnd.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${bitAnd.dataType}") return None } val child = bitAnd.child @@ -349,10 +349,10 @@ object CometBitAndAgg extends CometAggregateExpressionSerde[BitAndAgg] { .setBitAndAgg(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${bitAnd.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${bitAnd.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -368,7 +368,7 @@ object CometBitOrAgg extends CometAggregateExpressionSerde[BitOrAgg] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.bitwiseAggTypeSupported(bitOr.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${bitOr.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${bitOr.dataType}") return None } val child = bitOr.child @@ -385,10 +385,10 @@ object CometBitOrAgg extends CometAggregateExpressionSerde[BitOrAgg] { .setBitOrAgg(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${bitOr.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${bitOr.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -404,7 +404,7 @@ object CometBitXOrAgg extends CometAggregateExpressionSerde[BitXorAgg] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.bitwiseAggTypeSupported(bitXor.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${bitXor.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${bitXor.dataType}") return None } val child = bitXor.child @@ -421,10 +421,10 @@ object CometBitXOrAgg extends CometAggregateExpressionSerde[BitXorAgg] { .setBitXorAgg(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${bitXor.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${bitXor.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -457,7 +457,7 @@ trait CometCovBase { .setCovariance(builder) .build()) } else { - withInfo(aggExpr, "Child expression or data type not supported") + withFallbackReason(aggExpr, "Child expression or data type not supported") None } } @@ -523,7 +523,7 @@ trait CometVariance { .setVariance(builder) .build()) } else { - withInfo(aggExpr, expr.child) + withFallbackReason(aggExpr, expr.child) None } } @@ -578,7 +578,7 @@ trait CometStddev { .setStddev(builder) .build()) } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -630,7 +630,7 @@ object CometCorr extends CometAggregateExpressionSerde[Corr] { .setCorrelation(builder) .build()) } else { - withInfo(aggExpr, corr.x, corr.y) + withFallbackReason(aggExpr, corr.x, corr.y) None } } @@ -698,7 +698,7 @@ object CometBloomFilterAggregate extends CometAggregateExpressionSerde[BloomFilt .setBloomFilterAgg(builder) .build()) } else { - withInfo( + withFallbackReason( aggExpr, bloomFilter.child, bloomFilter.estimatedNumItemsExpression, @@ -751,10 +751,10 @@ object CometCollectSet extends CometAggregateExpressionSerde[CollectSet] { .setCollectSet(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${expr.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${expr.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala b/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala index 85574fbab7..58e99f9c79 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala @@ -24,9 +24,9 @@ import scala.math.min import org.apache.spark.sql.catalyst.expressions.{Add, Attribute, Cast, Divide, EmptyRow, EqualTo, EvalMode, Expression, If, IntegralDivide, Literal, Multiply, Remainder, Round, Subtract, UnaryMinus} import org.apache.spark.sql.types.{ByteType, DataType, DecimalType, DoubleType, FloatType, IntegerType, LongType, ShortType} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} -import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProtoWithReturnType, serializeDataType} +import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProtoWithReturnType, serializeDataType} import org.apache.comet.shims.CometEvalModeUtil trait MathBase { @@ -61,7 +61,7 @@ trait MathBase { .newBuilder(), inner).build()) } else { - withInfo(expr, left, right) + withFallbackReason(expr, left, right) None } } @@ -92,7 +92,7 @@ object CometAdd extends CometExpressionSerde[Add] with MathBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } createMathExpression( @@ -114,7 +114,7 @@ object CometSubtract extends CometExpressionSerde[Subtract] with MathBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } createMathExpression( @@ -136,7 +136,7 @@ object CometMultiply extends CometExpressionSerde[Multiply] with MathBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } createMathExpression( @@ -163,7 +163,7 @@ object CometDivide extends CometExpressionSerde[Divide] with MathBase { val rightExpr = if (expr.evalMode != EvalMode.ANSI) nullIfWhenPrimitive(expr.right) else expr.right if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } val divideExpr = createMathExpression( @@ -200,7 +200,7 @@ object CometIntegralDivide extends CometExpressionSerde[IntegralDivide] with Mat inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } @@ -264,11 +264,11 @@ object CometRemainder extends CometExpressionSerde[Remainder] with MathBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } if (expr.evalMode == EvalMode.TRY) { - withInfo(expr, s"Eval mode ${expr.evalMode} is not supported") + withFallbackReason(expr, s"Eval mode ${expr.evalMode} is not supported") return None } @@ -297,7 +297,7 @@ object CometRound extends CometExpressionSerde[Round] { lazy val childExpr = exprToProtoInternal(r.child, inputs, binding) r.child.dataType match { case t: DecimalType if t.scale < 0 => // Spark disallows negative scale SPARK-30252 - withInfo(r, "Decimal type has negative scale") + withFallbackReason(r, "Decimal type has negative scale") None case _ if scaleV == null => exprToProtoInternal(Literal(null), inputs, binding) @@ -318,7 +318,7 @@ object CometRound extends CometExpressionSerde[Round] { // I.e. 6.13171162472835E18 == 6.1317116247283497E18. However, toString() does not. // That results in round(6.1317116247283497E18, -5) == 6.1317116247282995E18 instead // of 6.1317116247283999E18. - withInfo(r, "Comet does not support Spark's BigDecimal rounding") + withFallbackReason(r, "Comet does not support Spark's BigDecimal rounding") None case _ => // `scale` must be Int64 type in DataFusion @@ -330,7 +330,7 @@ object CometRound extends CometExpressionSerde[Round] { r.ansiEnabled, childExpr, scaleExpr) - optExprWithInfo(optExpr, r, r.child) + optExprWithFallbackReason(optExpr, r, r.child) } } @@ -352,7 +352,7 @@ object CometUnaryMinus extends CometExpressionSerde[UnaryMinus] { .setUnaryMinus(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index 33d3216857..b3ea8d7c4f 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde._ import org.apache.comet.shims.{CometExprShim, CometTypeShim} @@ -44,7 +44,7 @@ object CometArrayRemove val inputTypes: Set[DataType] = expr.children.map(_.dataType).toSet for (dt <- inputTypes) { if (!isTypeSupported(dt)) { - withInfo(expr, s"data type not supported: $dt") + withFallbackReason(expr, s"data type not supported: $dt") return None } } @@ -100,7 +100,7 @@ object CometArrayAppend extends CometExpressionSerde[ArrayAppend] { .setCaseWhen(caseWhenExpr) .build()) } else { - withInfo(expr, expr.children: _*) + withFallbackReason(expr, expr.children: _*) None } } @@ -177,7 +177,7 @@ object CometSortArray extends CometExpressionSerde[SortArray] { exprToProtoInternal(Literal(direction), inputs, binding), exprToProtoInternal(Literal(nullOrdering), inputs, binding)) case other => - withInfo(expr, s"ascendingOrder must be a boolean literal: $other") + withFallbackReason(expr, s"ascendingOrder must be a boolean literal: $other") (None, None) } @@ -187,7 +187,7 @@ object CometSortArray extends CometExpressionSerde[SortArray] { arrayExprProto, sortDirectionExprProto, nullOrderingExprProto) - optExprWithInfo(sortArrayScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(sortArrayScalarExpr, expr, expr.children: _*) } } @@ -221,7 +221,7 @@ object CometArrayIntersect extends CometExpressionSerde[ArrayIntersect] with Com val arraysIntersectScalarExpr = scalarFunctionExprToProto("array_intersect", leftArrayExprProto, rightArrayExprProto) - optExprWithInfo(arraysIntersectScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arraysIntersectScalarExpr, expr, expr.children: _*) } } @@ -234,7 +234,7 @@ object CometArrayMax extends CometExpressionSerde[ArrayMax] { val arrayMaxScalarExpr = scalarFunctionExprToProto("array_max", arrayExprProto) - optExprWithInfo(arrayMaxScalarExpr, expr) + optExprWithFallbackReason(arrayMaxScalarExpr, expr) } } @@ -246,7 +246,7 @@ object CometArrayMin extends CometExpressionSerde[ArrayMin] { val arrayExprProto = exprToProto(expr.children.head, inputs, binding) val arrayMinScalarExpr = scalarFunctionExprToProto("array_min", arrayExprProto) - optExprWithInfo(arrayMinScalarExpr, expr) + optExprWithFallbackReason(arrayMinScalarExpr, expr) } } @@ -264,7 +264,7 @@ object CometArraysOverlap extends CometExpressionSerde[ArraysOverlap] { false, leftArrayExprProto, rightArrayExprProto) - optExprWithInfo(arraysOverlapScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arraysOverlapScalarExpr, expr, expr.children: _*) } } @@ -328,7 +328,7 @@ object CometArrayCompact extends CometExpressionSerde[Expression] { ArrayType(elementType = elementType), false, arrayExprProto) - optExprWithInfo(arrayCompactScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arrayCompactScalarExpr, expr, expr.children: _*) } } @@ -363,7 +363,7 @@ object CometArrayExcept extends CometExpressionSerde[ArrayExcept] with CometExpr val inputTypes = expr.children.map(_.dataType).toSet for (dt <- inputTypes) { if (!isTypeSupported(dt)) { - withInfo(expr, s"data type not supported: $dt") + withFallbackReason(expr, s"data type not supported: $dt") return None } } @@ -372,7 +372,7 @@ object CometArrayExcept extends CometExpressionSerde[ArrayExcept] with CometExpr val arrayExceptScalarExpr = scalarFunctionExprToProto("array_except", leftArrayExprProto, rightArrayExprProto) - optExprWithInfo(arrayExceptScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arrayExceptScalarExpr, expr, expr.children: _*) } } @@ -402,7 +402,7 @@ object CometArrayJoin extends CometExpressionSerde[ArrayJoin] { delimiterExprProto, nullReplacementExprProto) - optExprWithInfo( + optExprWithFallbackReason( arrayJoinScalarExpr, expr, arrayExpr, @@ -412,7 +412,7 @@ object CometArrayJoin extends CometExpressionSerde[ArrayJoin] { val arrayJoinScalarExpr = scalarFunctionExprToProto("array_to_string", arrayExprProto, delimiterExprProto) - optExprWithInfo(arrayJoinScalarExpr, expr, arrayExpr, arrayExpr.delimiter) + optExprWithFallbackReason(arrayJoinScalarExpr, expr, arrayExpr, arrayExpr.delimiter) } } } @@ -444,7 +444,7 @@ object CometArrayInsert extends CometExpressionSerde[ArrayInsert] { .setArrayInsert(arrayInsertBuilder) .build()) } else { - withInfo( + withFallbackReason( expr, "unsupported arguments for ArrayInsert", expr.children.head, @@ -475,7 +475,7 @@ object CometSlice extends CometExpressionSerde[Slice] { arrayExprProto, startExprProto, lengthExprProto) - optExprWithInfo(sliceScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(sliceScalarExpr, expr, expr.children: _*) } } @@ -489,7 +489,7 @@ object CometArrayUnion extends CometExpressionSerde[ArrayUnion] { val arraysUnionScalarExpr = scalarFunctionExprToProto("array_union", leftArrayExprProto, rightArrayExprProto) - optExprWithInfo(arraysUnionScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arraysUnionScalarExpr, expr, expr.children: _*) } } @@ -513,7 +513,7 @@ object CometCreateArray extends CometExpressionSerde[CreateArray] { if (childExprs.forall(_.isDefined)) { scalarFunctionExprToProto("make_array", childExprs: _*) } else { - withInfo(expr, "unsupported arguments for CreateArray", children: _*) + withFallbackReason(expr, "unsupported arguments for CreateArray", children: _*) None } } @@ -542,7 +542,7 @@ object CometGetArrayItem extends CometExpressionSerde[GetArrayItem] { .setListExtract(listExtractBuilder) .build()) } else { - withInfo(expr, "unsupported arguments for GetArrayItem", expr.child, expr.ordinal) + withFallbackReason(expr, "unsupported arguments for GetArrayItem", expr.child, expr.ordinal) None } } @@ -575,12 +575,12 @@ object CometArrayReverse extends CometExpressionSerde[Reverse] with ArraysBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!isTypeSupported(expr.child.dataType)) { - withInfo(expr, s"child data type not supported: ${expr.child.dataType}") + withFallbackReason(expr, s"child data type not supported: ${expr.child.dataType}") return None } val reverseExprProto = exprToProto(expr.child, inputs, binding) val reverseScalarExpr = scalarFunctionExprToProto("array_reverse", reverseExprProto) - optExprWithInfo(reverseScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(reverseScalarExpr, expr, expr.children: _*) } } @@ -599,7 +599,7 @@ object CometElementAt extends CometExpressionSerde[ElementAt] { val defaultExpr = expr.defaultValueOutOfBound.flatMap(exprToProtoInternal(_, inputs, binding)) if (!expr.left.dataType.isInstanceOf[ArrayType]) { - withInfo(expr, "Input is not an array") + withFallbackReason(expr, "Input is not an array") return None } @@ -620,7 +620,7 @@ object CometElementAt extends CometExpressionSerde[ElementAt] { .setListExtract(arrayExtractBuilder) .build()) } else { - withInfo(expr, "unsupported arguments for ElementAt", expr.left, expr.right) + withFallbackReason(expr, "unsupported arguments for ElementAt", expr.left, expr.right) None } } @@ -635,13 +635,13 @@ object CometFlatten extends CometExpressionSerde[Flatten] with ArraysBase { val inputTypes = expr.children.map(_.dataType).toSet for (dt <- inputTypes) { if (!isTypeSupported(dt)) { - withInfo(expr, s"data type not supported: $dt") + withFallbackReason(expr, s"data type not supported: $dt") return None } } val flattenExprProto = exprToProto(expr.child, inputs, binding) val flattenScalarExpr = scalarFunctionExprToProto("flatten", flattenExprProto) - optExprWithInfo(flattenScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(flattenScalarExpr, expr, expr.children: _*) } } @@ -731,14 +731,14 @@ object CometArrayPosition extends CometExpressionSerde[ArrayPosition] with Array inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (expr.children.forall(_.foldable)) { - withInfo(expr, "all arguments are literals, falling back to Spark") + withFallbackReason(expr, "all arguments are literals, falling back to Spark") return None } // Check if input types are supported val inputTypes: Set[DataType] = expr.children.map(_.dataType).toSet for (dt <- inputTypes) { if (!isTypeSupported(dt)) { - withInfo(expr, s"data type not supported: $dt") + withFallbackReason(expr, s"data type not supported: $dt") return None } } @@ -750,7 +750,7 @@ object CometArrayPosition extends CometExpressionSerde[ArrayPosition] with Array // (matching Spark's behavior) val optExpr = scalarFunctionExprToProto("spark_array_position", arrayExprProto, elementExprProto) - optExprWithInfo(optExpr, expr, expr.left, expr.right) + optExprWithFallbackReason(optExpr, expr, expr.left, expr.right) } } @@ -817,7 +817,10 @@ object CometArraysZip extends CometExpressionSerde[ArraysZip] { .build()) } else { - withInfo(expr, "unsupported arguments for ArraysZip", expr.children ++ expr.names: _*) + withFallbackReason( + expr, + "unsupported arguments for ArraysZip", + expr.children ++ expr.names: _*) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/bitwise.scala b/spark/src/main/scala/org/apache/comet/serde/bitwise.scala index 751fb7521f..7c05dc4349 100644 --- a/spark/src/main/scala/org/apache/comet/serde/bitwise.scala +++ b/spark/src/main/scala/org/apache/comet/serde/bitwise.scala @@ -47,7 +47,7 @@ object CometBitwiseNot extends CometExpressionSerde[BitwiseNot] { val childProto = exprToProto(expr.child, inputs, binding) val bitNotScalarExpr = scalarFunctionExprToProto("bitwise_not", childProto) - optExprWithInfo(bitNotScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(bitNotScalarExpr, expr, expr.children: _*) } } @@ -136,7 +136,7 @@ object CometBitwiseGet extends CometExpressionSerde[BitwiseGet] { val posProto = exprToProto(expr.right, inputs, binding) val bitGetScalarExpr = scalarFunctionExprToProtoWithReturnType("bit_get", ByteType, false, argProto, posProto) - optExprWithInfo(bitGetScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(bitGetScalarExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/conditional.scala b/spark/src/main/scala/org/apache/comet/serde/conditional.scala index 617043524b..cd07730890 100644 --- a/spark/src/main/scala/org/apache/comet/serde/conditional.scala +++ b/spark/src/main/scala/org/apache/comet/serde/conditional.scala @@ -23,7 +23,7 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.sql.catalyst.expressions.{Attribute, CaseWhen, Coalesce, Expression, If, IsNotNull} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal object CometIf extends CometExpressionSerde[If] { @@ -45,7 +45,7 @@ object CometIf extends CometExpressionSerde[If] { .setIf(builder) .build()) } else { - withInfo(expr, expr.predicate, expr.trueValue, expr.falseValue) + withFallbackReason(expr, expr.predicate, expr.trueValue, expr.falseValue) None } } @@ -76,7 +76,7 @@ object CometCaseWhen extends CometExpressionSerde[CaseWhen] { if (elseValueExpr.isDefined) { builder.setElseExpr(elseValueExpr.get) } else { - withInfo(expr, expr.elseValue.get) + withFallbackReason(expr, expr.elseValue.get) return None } } @@ -86,7 +86,7 @@ object CometCaseWhen extends CometExpressionSerde[CaseWhen] { .setCaseWhen(builder) .build()) } else { - withInfo(expr, allBranches: _*) + withFallbackReason(expr, allBranches: _*) None } } @@ -116,7 +116,7 @@ object CometCoalesce extends CometExpressionSerde[Coalesce] { if (elseValueExpr.isDefined) { builder.setElseExpr(elseValueExpr.get) } else { - withInfo(expr, elseValue) + withFallbackReason(expr, elseValue) return None } Some( @@ -125,7 +125,7 @@ object CometCoalesce extends CometExpressionSerde[Coalesce] { .setCaseWhen(builder) .build()) } else { - withInfo(expr, branches.map(_._2): _*) + withFallbackReason(expr, branches.map(_._2): _*) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala b/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala index 80a2a39ef4..5ffcae2b2d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala +++ b/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala @@ -22,8 +22,8 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, KnownFloatingPointNormalized} import org.apache.spark.sql.catalyst.optimizer.NormalizeNaNAndZero -import org.apache.comet.CometSparkSessionExtensions.withInfo -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, serializeDataType} +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, serializeDataType} object CometKnownFloatingPointNormalized extends CometExpressionSerde[KnownFloatingPointNormalized] { @@ -50,7 +50,7 @@ object CometKnownFloatingPointNormalized val dataType = serializeDataType(wrapped.dataType) if (dataType.isEmpty) { - withInfo(wrapped, s"Unsupported datatype ${wrapped.dataType}") + withFallbackReason(wrapped, s"Unsupported datatype ${wrapped.dataType}") return None } val ex = exprToProtoInternal(wrapped, inputs, binding) @@ -61,6 +61,6 @@ object CometKnownFloatingPointNormalized .setDatatype(dataType.get) ExprOuterClass.Expr.newBuilder().setNormalizeNanAndZero(builder).build() } - optExprWithInfo(optExpr, expr, wrapped) + optExprWithFallbackReason(optExpr, expr, wrapped) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala b/spark/src/main/scala/org/apache/comet/serde/datetime.scala index 846d093915..9afb4cc07e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.types.{DateType, DoubleType, FloatType, IntegerType, import org.apache.spark.unsafe.types.UTF8String import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.CometGetDateField.CometGetDateField import org.apache.comet.serde.ExprOuterClass.Expr @@ -77,7 +77,7 @@ trait CometExprGetDateField[T <: GetDateField] { .build()) .build() }) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -135,7 +135,7 @@ object CometDayOfWeek .build() } .headOption - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -216,7 +216,7 @@ object CometHour extends CometExpressionSerde[Hour] { .setHour(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } @@ -258,7 +258,7 @@ object CometMinute extends CometExpressionSerde[Minute] { .setMinute(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } @@ -300,7 +300,7 @@ object CometSecond extends CometExpressionSerde[Second] { .setSecond(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } @@ -336,7 +336,7 @@ object CometUnixTimestamp extends CometExpressionSerde[UnixTimestamp] { binding: Boolean): Option[ExprOuterClass.Expr] = { if (!isSupportedInputType(expr)) { val inputType = expr.children.head.dataType - withInfo(expr, s"unix_timestamp does not support input type: $inputType") + withFallbackReason(expr, s"unix_timestamp does not support input type: $inputType") return None } @@ -355,7 +355,7 @@ object CometUnixTimestamp extends CometExpressionSerde[UnixTimestamp] { .setUnixTimestamp(builder) .build()) } else { - withInfo(expr, expr.children.head) + withFallbackReason(expr, expr.children.head) None } } @@ -388,7 +388,7 @@ object CometFromUTCTimestamp extends CometExpressionSerde[FromUTCTimestamp] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExprs = expr.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("from_utc_timestamp", childExprs: _*) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } @@ -406,7 +406,7 @@ object CometToUTCTimestamp extends CometExpressionSerde[ToUTCTimestamp] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExprs = expr.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("to_utc_timestamp", childExprs: _*) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } @@ -427,7 +427,7 @@ object CometConvertTimezone extends CometExpressionSerde[ConvertTimezone] { val ts = exprToProtoInternal(expr.sourceTs, inputs, binding) val toUtc = scalarFunctionExprToProto("to_utc_timestamp", ts, srcTz) val fromUtc = scalarFunctionExprToProto("from_utc_timestamp", toUtc, tgtTz) - optExprWithInfo(fromUtc, expr, expr.children: _*) + optExprWithFallbackReason(fromUtc, expr, expr.children: _*) } } @@ -473,7 +473,7 @@ object CometUnixDate extends CometExpressionSerde[UnixDate] { .build()) .build() } - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -515,7 +515,7 @@ object CometTruncDate extends CometExpressionSerde[TruncDate] { false, childExpr, formatExpr) - optExprWithInfo(optExpr, expr, expr.date, expr.format) + optExprWithFallbackReason(optExpr, expr, expr.date, expr.format) } } @@ -587,7 +587,7 @@ object CometTruncTimestamp extends CometExpressionSerde[TruncTimestamp] { .setTruncTimestamp(builder) .build()) } else { - withInfo(expr, expr.timestamp, expr.format) + withFallbackReason(expr, expr.timestamp, expr.format) None } } @@ -645,8 +645,8 @@ object CometDateFormat extends CometExpressionSerde[DateFormatClass] { "yyyy-MM-dd'T'HH:mm:ss" -> "%Y-%m-%dT%H:%M:%S") // Compatibility is decided inside `convert`: the native path covers a subset, and the codegen - // dispatcher covers everything else when enabled. Plan-time tagging happens via `withInfo` on - // the path that returns None. + // dispatcher covers everything else when enabled. Plan-time tagging happens via + // `withFallbackReason` on the path that returns None. override def getSupportLevel(expr: DateFormatClass): SupportLevel = Compatible() override def getCompatibleNotes(): Seq[String] = Seq( @@ -682,7 +682,7 @@ object CometDateFormat extends CometExpressionSerde[DateFormatClass] { false, childExpr, formatExpr) - optExprWithInfo(optExpr, expr, expr.left, expr.right) + optExprWithFallbackReason(optExpr, expr, expr.left, expr.right) } else { // Hand the full `DateFormatClass` (with `timeZoneId` already stamped by `ResolveTimeZone`) // to the codegen dispatcher. It closure-serializes the bound tree, so non-UTC timezones @@ -716,10 +716,10 @@ object CometHours extends CometExpressionSerde[Hours] { .build() } case other => - withInfo(expr, s"Hours does not support input type: $other") + withFallbackReason(expr, s"Hours does not support input type: $other") None } - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -749,7 +749,7 @@ object CometDays extends CometExpressionSerde[Days] { CometCast.castToProto(expr, Some(timezone), DateType, child, CometEvalMode.LEGACY) } case other => - withInfo(expr, s"Days does not support input type: $other") + withFallbackReason(expr, s"Days does not support input type: $other") None } @@ -768,7 +768,7 @@ object CometDays extends CometExpressionSerde[Days] { .build() } - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/decimalExpressions.scala b/spark/src/main/scala/org/apache/comet/serde/decimalExpressions.scala index 52adf950c1..f41e0fafaa 100644 --- a/spark/src/main/scala/org/apache/comet/serde/decimalExpressions.scala +++ b/spark/src/main/scala/org/apache/comet/serde/decimalExpressions.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, MakeDecimal, UnscaledValue} import org.apache.spark.sql.types.{DecimalType, LongType} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProtoWithReturnType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProtoWithReturnType} object CometUnscaledValue extends CometExpressionSerde[UnscaledValue] { override def convert( @@ -32,7 +32,7 @@ object CometUnscaledValue extends CometExpressionSerde[UnscaledValue] { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val optExpr = scalarFunctionExprToProtoWithReturnType("unscaled_value", LongType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -58,7 +58,7 @@ object CometMakeDecimal extends CometExpressionSerde[MakeDecimal] { DecimalType(expr.precision, expr.scale), failOnError = !expr.nullOnOverflow, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/hash.scala b/spark/src/main/scala/org/apache/comet/serde/hash.scala index a58e81b02d..baf6716ae4 100644 --- a/spark/src/main/scala/org/apache/comet/serde/hash.scala +++ b/spark/src/main/scala/org/apache/comet/serde/hash.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Murmur3Hash, Sha1, Sha2, XxHash64} import org.apache.spark.sql.types.{ArrayType, DataType, DecimalType, IntegerType, LongType, MapType, StringType, StructType} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, isTimeType, scalarFunctionExprToProtoWithReturnType, serializeDataType, supportedDataType} object CometXxHash64 extends CometExpressionSerde[XxHash64] { @@ -79,7 +79,7 @@ object CometSha2 extends CometExpressionSerde[Sha2] { // It's possible for spark to dynamically compute the number of bits from input // expression, however DataFusion does not support that yet. if (!expr.right.foldable) { - withInfo(expr, "For Sha2, non literal numBits is not supported") + withFallbackReason(expr, "For Sha2, non literal numBits is not supported") return None } @@ -95,7 +95,7 @@ object CometSha1 extends CometExpressionSerde[Sha1] { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!HashUtils.isSupportedType(expr)) { - withInfo(expr, s"HashUtils doesn't support dataType: ${expr.child.dataType}") + withFallbackReason(expr, s"HashUtils doesn't support dataType: ${expr.child.dataType}") return None } val childExpr = exprToProtoInternal(expr.child, inputs, binding) @@ -118,7 +118,7 @@ private object HashUtils { case d: DecimalType if d.precision > 18 => // Spark converts decimals with precision > 18 into // Java BigDecimal before hashing - withInfo(expr, s"Unsupported datatype: $dt (precision > 18)") + withFallbackReason(expr, s"Unsupported datatype: $dt (precision > 18)") false case s: StructType => s.fields.forall(f => isSupportedDataType(expr, f.dataType)) @@ -127,10 +127,10 @@ private object HashUtils { case m: MapType => isSupportedDataType(expr, m.keyType) && isSupportedDataType(expr, m.valueType) case dt if isTimeType(dt) => - withInfo(expr, s"Unsupported datatype $dt") + withFallbackReason(expr, s"Unsupported datatype $dt") false case _ if !supportedDataType(dt, allowComplex = true) => - withInfo(expr, s"Unsupported datatype $dt") + withFallbackReason(expr, s"Unsupported datatype $dt") false case _ => true diff --git a/spark/src/main/scala/org/apache/comet/serde/literals.scala b/spark/src/main/scala/org/apache/comet/serde/literals.scala index 5b03985c09..4f2a5dfa5e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/literals.scala +++ b/spark/src/main/scala/org/apache/comet/serde/literals.scala @@ -29,7 +29,7 @@ import org.apache.spark.unsafe.types.UTF8String import com.google.protobuf.ByteString -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.DataTypeSupport.isComplexType import org.apache.comet.serde.{CometExpressionSerde, Compatible, ExprOuterClass, LiteralOuterClass, SupportLevel, Unsupported} import org.apache.comet.serde.QueryPlanSerde.{isTimeType, serializeDataType, supportedDataType} @@ -101,7 +101,7 @@ object CometLiteral extends CometExpressionSerde[Literal] with Logging { exprBuilder.setListVal(listLiteralBuilder.build()) exprBuilder.setDatatype(serializeDataType(dataType).get) case dt => - withInfo(expr, s"Unexpected datatype '$dt' for literal value '$value'") + withFallbackReason(expr, s"Unexpected datatype '$dt' for literal value '$value'") return None } } @@ -117,7 +117,7 @@ object CometLiteral extends CometExpressionSerde[Literal] with Logging { .setLiteral(exprBuilder) .build()) } else { - withInfo(expr, s"Unsupported datatype $dataType") + withFallbackReason(expr, s"Unsupported datatype $dataType") None } diff --git a/spark/src/main/scala/org/apache/comet/serde/maps.scala b/spark/src/main/scala/org/apache/comet/serde/maps.scala index 01c100b5e7..abecbaa16d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/maps.scala +++ b/spark/src/main/scala/org/apache/comet/serde/maps.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ -import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} object CometMapKeys extends CometExpressionSerde[MapKeys] { @@ -32,7 +32,7 @@ object CometMapKeys extends CometExpressionSerde[MapKeys] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val mapKeysScalarExpr = scalarFunctionExprToProto("map_keys", childExpr) - optExprWithInfo(mapKeysScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapKeysScalarExpr, expr, expr.children: _*) } } @@ -44,7 +44,7 @@ object CometMapEntries extends CometExpressionSerde[MapEntries] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val mapEntriesScalarExpr = scalarFunctionExprToProto("map_entries", childExpr) - optExprWithInfo(mapEntriesScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapEntriesScalarExpr, expr, expr.children: _*) } } @@ -56,7 +56,7 @@ object CometMapValues extends CometExpressionSerde[MapValues] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val mapValuesScalarExpr = scalarFunctionExprToProto("map_values", childExpr) - optExprWithInfo(mapValuesScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapValuesScalarExpr, expr, expr.children: _*) } } @@ -69,7 +69,7 @@ object CometMapExtract extends CometExpressionSerde[GetMapValue] { val mapExpr = exprToProtoInternal(expr.child, inputs, binding) val keyExpr = exprToProtoInternal(expr.key, inputs, binding) val mapExtractExpr = scalarFunctionExprToProto("map_extract", mapExpr, keyExpr) - optExprWithInfo(mapExtractExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapExtractExpr, expr, expr.children: _*) } } @@ -129,7 +129,7 @@ object CometMapContainsKey extends CometExpressionSerde[MapContainsKey] { val mapKeysExpr = scalarFunctionExprToProto("map_keys", mapExpr) val mapContainsKeyExpr = scalarFunctionExprToProto("array_has", mapKeysExpr, keyExpr) - optExprWithInfo(mapContainsKeyExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapContainsKeyExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/math.scala b/spark/src/main/scala/org/apache/comet/serde/math.scala index 401d14cc78..3be98604c3 100644 --- a/spark/src/main/scala/org/apache/comet/serde/math.scala +++ b/spark/src/main/scala/org/apache/comet/serde/math.scala @@ -22,8 +22,8 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Abs, Add, Atan2, Attribute, Ceil, CheckOverflow, Expression, Floor, Hex, If, LessThanOrEqual, Literal, Log, Log10, Log2, Logarithm, Unhex} import org.apache.spark.sql.types.{DecimalType, DoubleType, NumericType} -import org.apache.comet.CometSparkSessionExtensions.withInfo -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, serializeDataType} +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, serializeDataType} object CometAtan2 extends CometExpressionSerde[Atan2] { override def convert( @@ -36,7 +36,7 @@ object CometAtan2 extends CometExpressionSerde[Atan2] { val leftExpr = exprToProtoInternal(left, inputs, binding) val rightExpr = exprToProtoInternal(right, inputs, binding) val optExpr = scalarFunctionExprToProto("atan2", leftExpr, rightExpr) - optExprWithInfo(optExpr, expr, expr.left, expr.right) + optExprWithFallbackReason(optExpr, expr, expr.left, expr.right) } } @@ -50,12 +50,12 @@ object CometCeil extends CometExpressionSerde[Ceil] { case t: DecimalType if t.scale == 0 => // zero scale is no-op childExpr case t: DecimalType if t.scale < 0 => // Spark disallows negative scale SPARK-30252 - withInfo(expr, s"Decimal type $t has negative scale") + withFallbackReason(expr, s"Decimal type $t has negative scale") None case _ => val optExpr = scalarFunctionExprToProtoWithReturnType("ceil", expr.dataType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } } @@ -70,12 +70,12 @@ object CometFloor extends CometExpressionSerde[Floor] { case t: DecimalType if t.scale == 0 => // zero scale is no-op childExpr case t: DecimalType if t.scale < 0 => // Spark disallows negative scale SPARK-30252 - withInfo(expr, s"Decimal type $t has negative scale") + withFallbackReason(expr, s"Decimal type $t has negative scale") None case _ => val optExpr = scalarFunctionExprToProtoWithReturnType("floor", expr.dataType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } } @@ -90,7 +90,7 @@ object CometLog extends CometExpressionSerde[Log] with MathExprBase { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(nullIfNegative(expr.child), inputs, binding) val optExpr = scalarFunctionExprToProto("ln", childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -101,7 +101,7 @@ object CometLog10 extends CometExpressionSerde[Log10] with MathExprBase { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(nullIfNegative(expr.child), inputs, binding) val optExpr = scalarFunctionExprToProto("log10", childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -112,7 +112,7 @@ object CometLog2 extends CometExpressionSerde[Log2] with MathExprBase { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(nullIfNegative(expr.child), inputs, binding) val optExpr = scalarFunctionExprToProto("log2", childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -128,7 +128,7 @@ object CometLogarithm extends CometExpressionSerde[Logarithm] { val rightExpr = exprToProtoInternal(expr.right, inputs, binding) val optExpr = scalarFunctionExprToProtoWithReturnType("spark_log", DoubleType, false, leftExpr, rightExpr) - optExprWithInfo(optExpr, expr, expr.left, expr.right) + optExprWithFallbackReason(optExpr, expr, expr.left, expr.right) } } @@ -139,7 +139,7 @@ object CometHex extends CometExpressionSerde[Hex] with MathExprBase { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val optExpr = scalarFunctionExprToProtoWithReturnType("hex", expr.dataType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -158,7 +158,7 @@ object CometUnhex extends CometExpressionSerde[Unhex] with MathExprBase { false, childExpr, failOnErrorExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -192,7 +192,7 @@ object CometAbs extends CometExpressionSerde[Abs] with MathExprBase { false, childExpr, failOnErrorExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -237,7 +237,7 @@ object CometCheckOverflow extends CometExpressionSerde[CheckOverflow] { .setCheckOverflow(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/namedExpressions.scala b/spark/src/main/scala/org/apache/comet/serde/namedExpressions.scala index aba52d3624..b778f2ea87 100644 --- a/spark/src/main/scala/org/apache/comet/serde/namedExpressions.scala +++ b/spark/src/main/scala/org/apache/comet/serde/namedExpressions.scala @@ -21,7 +21,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, BindReferences, BoundReference} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, serializeDataType} object CometAlias extends CometExpressionSerde[Alias] { @@ -31,7 +31,7 @@ object CometAlias extends CometExpressionSerde[Alias] { binding: Boolean): Option[ExprOuterClass.Expr] = { val r = exprToProtoInternal(a.child, inputs, binding) if (r.isEmpty) { - withInfo(a, a.child) + withFallbackReason(a, a.child) } r } @@ -53,7 +53,7 @@ object CometAttributeReference extends CometExpressionSerde[AttributeReference] .bindReference(attr, inputs, allowFailures = true) if (boundRef.isInstanceOf[AttributeReference]) { - withInfo(attr, s"cannot resolve $attr among ${inputs.mkString(", ")}") + withFallbackReason(attr, s"cannot resolve $attr among ${inputs.mkString(", ")}") return None } @@ -82,7 +82,7 @@ object CometAttributeReference extends CometExpressionSerde[AttributeReference] .build()) } } else { - withInfo(attr, s"unsupported datatype: ${attr.dataType}") + withFallbackReason(attr, s"unsupported datatype: ${attr.dataType}") None } diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometDataWritingCommand.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometDataWritingCommand.scala index 69b9bd5f85..60fb65277e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometDataWritingCommand.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometDataWritingCommand.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.internal.SQLConf import org.apache.comet.{CometConf, ConfigEntry} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.objectstore.NativeConfig import org.apache.comet.serde.{CometOperatorSerde, Incompatible, OperatorOuterClass, SupportLevel, Unsupported} import org.apache.comet.serde.OperatorOuterClass.Operator @@ -104,7 +104,7 @@ object CometDataWritingCommand extends CometOperatorSerde[DataWritingCommandExec } if (scanTypes.length != cmd.query.output.length) { - withInfo(op, "Cannot serialize data types for native write") + withFallbackReason(op, "Cannot serialize data types for native write") return None } @@ -124,7 +124,7 @@ object CometDataWritingCommand extends CometOperatorSerde[DataWritingCommandExec case "zstd" => OperatorOuterClass.CompressionCodec.Zstd case "none" => OperatorOuterClass.CompressionCodec.None case other => - withInfo(op, s"Unsupported compression codec: $other") + withFallbackReason(op, s"Unsupported compression codec: $other") return None } @@ -157,7 +157,7 @@ object CometDataWritingCommand extends CometOperatorSerde[DataWritingCommandExec Some(writerOperator) } catch { case e: Exception => - withInfo( + withFallbackReason( op, "Failed to convert DataWritingCommandExec to native execution: " + s"${e.getMessage}") diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala index 76fefa15c4..8662f5774e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.comet.{CometConf, ConfigEntry} import org.apache.comet.CometConf.COMET_EXEC_ENABLED -import org.apache.comet.CometSparkSessionExtensions.{hasExplainInfo, isSpark35Plus, isSpark41Plus, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{hasFallbackReason, isSpark35Plus, isSpark41Plus, withFallbackReason} import org.apache.comet.objectstore.NativeConfig import org.apache.comet.parquet.CometParquetUtils import org.apache.comet.serde.{CometOperatorSerde, Compatible, OperatorOuterClass, SupportLevel} @@ -48,13 +48,15 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { /** Determine whether the scan is supported and tag the Spark plan with any fallback reasons */ def isSupported(scanExec: FileSourceScanExec): Boolean = { - if (hasExplainInfo(scanExec)) { + if (hasFallbackReason(scanExec)) { // this node has already been tagged with fallback reasons return false } if (!COMET_EXEC_ENABLED.get()) { - withInfo(scanExec, s"Full native scan disabled because ${COMET_EXEC_ENABLED.key} disabled") + withFallbackReason( + scanExec, + s"Full native scan disabled because ${COMET_EXEC_ENABLED.key} disabled") } // AQE DPP (SubqueryAdaptiveBroadcastExec) is converted to CometSubqueryBroadcastExec @@ -67,14 +69,14 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { // rule can't run. CometScanRule.transformV1Scan rejects AQE DPP on 3.4, so this check // is a safety net: if the scan somehow reached here with AQE DPP on 3.4, reject it. if (!isSpark35Plus && scanExec.partitionFilters.exists(isAqeDynamicPruningFilter)) { - withInfo(scanExec, "Native DataFusion scan does not support AQE DPP on Spark 3.4") + withFallbackReason(scanExec, "Native DataFusion scan does not support AQE DPP on Spark 3.4") } if (SQLConf.get.ignoreCorruptFiles || scanExec.relation.options .get("ignorecorruptfiles") // Spark sets this to lowercase. .contains("true")) { - withInfo(scanExec, "Full native scan disabled because ignoreCorruptFiles enabled") + withFallbackReason(scanExec, "Full native scan disabled because ignoreCorruptFiles enabled") } if (SQLConf.get.ignoreMissingFiles || @@ -82,11 +84,11 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { .get("ignoremissingfiles") // Spark sets this to lowercase. .contains("true")) { - withInfo(scanExec, "Full native scan disabled because ignoreMissingFiles enabled") + withFallbackReason(scanExec, "Full native scan disabled because ignoreMissingFiles enabled") } // the scan is supported if no fallback reasons were added to the node - !hasExplainInfo(scanExec) + !hasFallbackReason(scanExec) } /** Detects AQE DPP (SubqueryAdaptiveBroadcastExec), as opposed to non-AQE DPP. */ @@ -234,7 +236,7 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { } else { // There are unsupported scan type - withInfo( + withFallbackReason( scan, s"unsupported Comet operator: ${scan.nodeName}, due to unsupported data types above") None diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometSink.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometSink.scala index 845803d133..ff11b5d23b 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometSink.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometSink.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec import org.apache.spark.sql.execution.exchange.ReusedExchangeExec import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.ConfigEntry import org.apache.comet.serde.{CometOperatorSerde, OperatorOuterClass} import org.apache.comet.serde.OperatorOuterClass.Operator @@ -53,7 +53,7 @@ abstract class CometSink[T <: SparkPlan] extends CometOperatorSerde[T] { op.output.forall(a => supportedDataType(a.dataType, allowComplex = true)) if (!supportedTypes) { - withInfo(op, "Unsupported data type") + withFallbackReason(op, "Unsupported data type") return None } @@ -80,7 +80,7 @@ abstract class CometSink[T <: SparkPlan] extends CometOperatorSerde[T] { Some(builder.setScan(scanBuilder).build()) } else { // There are unsupported scan type - withInfo( + withFallbackReason( op, s"unsupported Comet operator: ${op.nodeName}, due to unsupported data types above") None @@ -123,7 +123,7 @@ object CometExchangeSink extends CometSink[SparkPlan] { op.output.forall(a => supportedDataType(a.dataType, allowComplex = true)) if (!supportedTypes) { - withInfo(op, "Unsupported data type for shuffle direct read") + withFallbackReason(op, "Unsupported data type for shuffle direct read") return None } @@ -144,7 +144,7 @@ object CometExchangeSink extends CometSink[SparkPlan] { builder.clearChildren() Some(builder.setShuffleScan(scanBuilder).build()) } else { - withInfo(op, s"unsupported data types in ${op.nodeName} for shuffle direct read") + withFallbackReason(op, s"unsupported data types in ${op.nodeName} for shuffle direct read") None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/predicates.scala b/spark/src/main/scala/org/apache/comet/serde/predicates.scala index 2e253bc239..7abe40823e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/predicates.scala +++ b/spark/src/main/scala/org/apache/comet/serde/predicates.scala @@ -24,7 +24,7 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.sql.catalyst.expressions.{And, Attribute, EqualNullSafe, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, In, InSet, IsNaN, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not, Or} import org.apache.spark.sql.types.BooleanType -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.ExprOuterClass.Expr import org.apache.comet.serde.QueryPlanSerde._ @@ -220,7 +220,7 @@ object CometIsNaN extends CometExpressionSerde[IsNaN] { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val optExpr = scalarFunctionExprToProtoWithReturnType("isnan", BooleanType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -271,7 +271,7 @@ object ComparisonUtils { .build()) } else { val allExprs = list ++ Seq(value) - withInfo(expr, allExprs: _*) + withFallbackReason(expr, allExprs: _*) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/statics.scala b/spark/src/main/scala/org/apache/comet/serde/statics.scala index bff64e753a..b2a4b991d1 100644 --- a/spark/src/main/scala/org/apache/comet/serde/statics.scala +++ b/spark/src/main/scala/org/apache/comet/serde/statics.scala @@ -23,8 +23,8 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionImplUtils import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils -import org.apache.comet.CometSparkSessionExtensions.withInfo -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} object CometStaticInvoke extends CometExpressionSerde[StaticInvoke] { @@ -48,7 +48,7 @@ object CometStaticInvoke extends CometExpressionSerde[StaticInvoke] { case Some(handler) => handler.convert(expr, inputs, binding) case None => - withInfo( + withFallbackReason( expr, s"Static invoke expression: ${expr.functionName} is not supported", expr.children: _*) @@ -64,7 +64,7 @@ object CometUrlEncodeStaticInvoke extends CometExpressionSerde[StaticInvoke] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.children.head, inputs, binding) val optExpr = scalarFunctionExprToProto("url_encode", childExpr) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } @@ -80,6 +80,6 @@ object CometUrlDecodeStaticInvoke extends CometExpressionSerde[StaticInvoke] { val funcName = if (failOnError) "url_decode" else "try_url_decode" val childExpr = exprToProtoInternal(expr.children.head, inputs, binding) val optExpr = scalarFunctionExprToProto(funcName, childExpr) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index e628e3742b..f2f10d5f1c 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -26,10 +26,10 @@ import org.apache.spark.sql.types.{BinaryType, DataTypes, LongType, StringType} import org.apache.spark.unsafe.types.UTF8String import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode, RegExp} import org.apache.comet.serde.ExprOuterClass.Expr -import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType} +import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType} object CometStringRepeat extends CometExpressionSerde[StringRepeat] { @@ -47,7 +47,7 @@ object CometStringRepeat extends CometExpressionSerde[StringRepeat] { val leftExpr = exprToProtoInternal(leftCast, inputs, binding) val rightExpr = exprToProtoInternal(rightCast, inputs, binding) val optExpr = scalarFunctionExprToProto("repeat", leftExpr, rightExpr) - optExprWithInfo(optExpr, expr, leftCast, rightCast) + optExprWithFallbackReason(optExpr, expr, leftCast, rightCast) } } @@ -118,11 +118,11 @@ object CometSubstring extends CometExpressionSerde[Substring] { builder.setLen(len.asInstanceOf[Int]) Some(ExprOuterClass.Expr.newBuilder().setSubstring(builder).build()) case None => - withInfo(expr, expr.str) + withFallbackReason(expr, expr.str) None } case _ => - withInfo(expr, "Substring pos and len must be literals") + withFallbackReason(expr, "Substring pos and len must be literals") None } } @@ -140,7 +140,7 @@ object CometSubstringIndex extends CometExpressionSerde[SubstringIndex] { val countExpr = exprToProtoInternal(countCast, inputs, binding) val optExpr = scalarFunctionExprToProto("substring_index", strExpr, delimExpr, countExpr) - optExprWithInfo(optExpr, expr, expr.strExpr, expr.delimExpr, expr.countExpr) + optExprWithFallbackReason(optExpr, expr, expr.strExpr, expr.delimExpr, expr.countExpr) } } @@ -161,11 +161,11 @@ object CometLeft extends CometExpressionSerde[Left] { builder.setLen(lenValue.asInstanceOf[Int]) Some(ExprOuterClass.Expr.newBuilder().setSubstring(builder).build()) case None => - withInfo(expr, expr.str) + withFallbackReason(expr, expr.str) None } case _ => - withInfo(expr, "LEFT len must be a literal") + withFallbackReason(expr, "LEFT len must be a literal") None } } @@ -203,12 +203,12 @@ object CometRight extends CometExpressionSerde[Right] { builder.setLen(lenInt) Some(ExprOuterClass.Expr.newBuilder().setSubstring(builder).build()) case None => - withInfo(expr, expr.str) + withFallbackReason(expr, expr.str) None } } case _ => - withInfo(expr, "RIGHT len must be a literal") + withFallbackReason(expr, "RIGHT len must be a literal") None } } @@ -248,7 +248,7 @@ object CometConcatWs extends CometExpressionSerde[ConcatWs] { case _ if expr.children.forall(_.foldable) => // Fall back to Spark for all-literal args so ConstantFolding can handle it - withInfo(expr, "all arguments are foldable") + withFallbackReason(expr, "all arguments are foldable") None case _ => @@ -270,7 +270,9 @@ object CometLike extends CometExpressionSerde[Like] { binding, (builder, binaryExpr) => builder.setLike(binaryExpr)) } else { - withInfo(expr, s"custom escape character ${expr.escapeChar} not supported in LIKE") + withFallbackReason( + expr, + s"custom escape character ${expr.escapeChar} not supported in LIKE") None } } @@ -286,7 +288,7 @@ object CometRLike extends CometExpressionSerde[RLike] { case Literal(pattern, DataTypes.StringType) => if (!RegExp.isSupportedPattern(pattern.toString) && !CometConf.isExprAllowIncompat("regexp")) { - withInfo( + withFallbackReason( expr, s"Regexp pattern $pattern is not compatible with Spark. " + s"Set ${CometConf.getExprAllowIncompatConfigKey("regexp")}=true " + @@ -302,7 +304,7 @@ object CometRLike extends CometExpressionSerde[RLike] { (builder, binaryExpr) => builder.setRlike(binaryExpr)) } case _ => - withInfo(expr, "Only scalar regexp patterns are supported") + withFallbackReason(expr, "Only scalar regexp patterns are supported") None } } @@ -375,7 +377,7 @@ object CometRegExpReplace extends CometExpressionSerde[RegExpReplace] { override def getSupportLevel(expr: RegExpReplace): SupportLevel = { if (!RegExp.isSupportedPattern(expr.regexp.toString) && !CometConf.isExprAllowIncompat("regexp")) { - withInfo( + withFallbackReason( expr, s"Regexp pattern ${expr.regexp} is not compatible with Spark. " + s"Set ${CometConf.getExprAllowIncompatConfigKey("regexp")}=true " + @@ -405,7 +407,7 @@ object CometRegExpReplace extends CometExpressionSerde[RegExpReplace] { patternExpr, replacementExpr, flagsExpr) - optExprWithInfo(optExpr, expr, expr.subject, expr.regexp, expr.rep, expr.pos) + optExprWithFallbackReason(optExpr, expr, expr.subject, expr.regexp, expr.rep, expr.pos) } } @@ -436,7 +438,7 @@ object CometStringSplit extends CometExpressionSerde[StringSplit] { strExpr, regexExpr, limitExpr) - optExprWithInfo(optExpr, expr, expr.str, expr.regex, expr.limit) + optExprWithFallbackReason(optExpr, expr, expr.str, expr.regex, expr.limit) } } @@ -463,7 +465,7 @@ object CometGetJsonObject extends CometExpressionSerde[GetJsonObject] { false, jsonExpr, pathExpr) - optExprWithInfo(optExpr, expr, expr.json, expr.path) + optExprWithFallbackReason(optExpr, expr, expr.json, expr.path) } } @@ -485,11 +487,11 @@ trait CommonStringExprs { if (binExpr.isDefined) { CometCast.castToProto(expr, None, DataTypes.StringType, binExpr.get, CometEvalMode.TRY) } else { - withInfo(expr, bin) + withFallbackReason(expr, bin) None } case _ => - withInfo(expr, "Comet only supports decoding with 'utf-8'.") + withFallbackReason(expr, "Comet only supports decoding with 'utf-8'.") None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/structs.scala b/spark/src/main/scala/org/apache/comet/serde/structs.scala index 3f0f184263..3c7dc17844 100644 --- a/spark/src/main/scala/org/apache/comet/serde/structs.scala +++ b/spark/src/main/scala/org/apache/comet/serde/structs.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, CreateNamedStruct, import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.DataTypeSupport import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, serializeDataType} @@ -62,7 +62,7 @@ object CometCreateNamedStruct extends CometExpressionSerde[CreateNamedStruct] { .setCreateNamedStruct(structBuilder) .build()) } else { - withInfo(expr, "unsupported arguments for CreateNamedStruct", expr.valExprs: _*) + withFallbackReason(expr, "unsupported arguments for CreateNamedStruct", expr.valExprs: _*) None } @@ -107,7 +107,7 @@ object CometGetArrayStructFields extends CometExpressionSerde[GetArrayStructFiel .setGetArrayStructFields(arrayStructFieldsBuilder) .build()) } else { - withInfo(expr, "unsupported arguments for GetArrayStructFields", expr.child) + withFallbackReason(expr, "unsupported arguments for GetArrayStructFields", expr.child) None } } @@ -145,7 +145,7 @@ object CometStructsToJson extends CometExpressionSerde[StructsToJson] { .setToJson(toJson) .build()) case _ => - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } @@ -188,7 +188,7 @@ object CometJsonToStructs extends CometExpressionSerde[JsonToStructs] { binding: Boolean): Option[ExprOuterClass.Expr] = { if (expr.schema == null) { - withInfo(expr, "from_json requires explicit schema") + withFallbackReason(expr, "from_json requires explicit schema") return None } @@ -205,7 +205,7 @@ object CometJsonToStructs extends CometExpressionSerde[JsonToStructs] { val schemaType = expr.schema if (!isSupportedType(schemaType)) { - withInfo(expr, "from_json: Unsupported schema type") + withFallbackReason(expr, "from_json: Unsupported schema type") return None } @@ -213,13 +213,15 @@ object CometJsonToStructs extends CometExpressionSerde[JsonToStructs] { if (options.nonEmpty) { val mode = options.getOrElse("mode", "PERMISSIVE") if (mode != "PERMISSIVE") { - withInfo(expr, s"from_json: Only PERMISSIVE mode supported, got: $mode") + withFallbackReason(expr, s"from_json: Only PERMISSIVE mode supported, got: $mode") return None } val knownOptions = Set("mode") val unknownOpts = options.keySet -- knownOptions if (unknownOpts.nonEmpty) { - withInfo(expr, s"from_json: Ignoring unsupported options: ${unknownOpts.mkString(", ")}") + withFallbackReason( + expr, + s"from_json: Ignoring unsupported options: ${unknownOpts.mkString(", ")}") } } diff --git a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala index e5eeb5b848..07c748219a 100644 --- a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala @@ -22,8 +22,8 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, FromUnixTime, Literal} import org.apache.spark.sql.catalyst.util.TimestampFormatter -import org.apache.comet.CometSparkSessionExtensions.withInfo -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} // TODO: DataFusion supports only -8334601211038 <= sec <= 8210266876799 // https://github.com/apache/datafusion/issues/16594 @@ -49,15 +49,15 @@ object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] { val timeZone = exprToProtoInternal(Literal(expr.timeZoneId.orNull), inputs, binding) if (expr.format != Literal(TimestampFormatter.defaultPattern)) { - withInfo(expr, "Datetime pattern format is unsupported") + withFallbackReason(expr, "Datetime pattern format is unsupported") None } else if (secExpr.isDefined && formatExpr.isDefined) { val timestampExpr = scalarFunctionExprToProto("from_unixtime", Seq(secExpr, timeZone): _*) val optExpr = scalarFunctionExprToProto("to_char", Seq(timestampExpr, formatExpr): _*) - optExprWithInfo(optExpr, expr, expr.sec, expr.format) + optExprWithFallbackReason(optExpr, expr, expr.sec, expr.format) } else { - withInfo(expr, expr.sec, expr.format) + withFallbackReason(expr, expr.sec, expr.format) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/url.scala b/spark/src/main/scala/org/apache/comet/serde/url.scala index b672cc17bb..b54d78ba25 100644 --- a/spark/src/main/scala/org/apache/comet/serde/url.scala +++ b/spark/src/main/scala/org/apache/comet/serde/url.scala @@ -21,7 +21,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, ParseUrl} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} object CometParseUrl extends CometExpressionSerde[ParseUrl] { @@ -32,6 +32,6 @@ object CometParseUrl extends CometExpressionSerde[ParseUrl] { val funcName = if (expr.failOnError) "parse_url" else "try_parse_url" val childExprs = expr.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto(funcName, childExprs: _*) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala index e642bafa4f..18c5c1e2b8 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.types.DecimalType import com.google.common.base.Objects import org.apache.comet.{CometConf, ConfigEntry} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.{AggSerde, CometOperatorSerde, Incompatible, OperatorOuterClass, SupportLevel} import org.apache.comet.serde.OperatorOuterClass.Operator import org.apache.comet.serde.QueryPlanSerde.{aggExprToProto, exprToProto, scalarFunctionExprToProto} @@ -68,7 +68,7 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { }.toArray if (winExprs.length != op.windowExpression.length) { - withInfo(op, "Unsupported window expression(s)") + withFallbackReason(op, "Unsupported window expression(s)") return None } @@ -115,14 +115,14 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { if (AggSerde.minMaxDataTypeSupported(min.dataType)) { Some(agg) } else { - withInfo(windowExpr, s"datatype ${min.dataType} is not supported", expr) + withFallbackReason(windowExpr, s"datatype ${min.dataType} is not supported", expr) None } case max: Max => if (AggSerde.minMaxDataTypeSupported(max.dataType)) { Some(agg) } else { - withInfo(windowExpr, s"datatype ${max.dataType} is not supported", expr) + withFallbackReason(windowExpr, s"datatype ${max.dataType} is not supported", expr) None } case s: Sum => @@ -130,11 +130,11 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { .isInstanceOf[DecimalType]) { Some(agg) } else { - withInfo(windowExpr, s"datatype ${s.dataType} is not supported", expr) + withFallbackReason(windowExpr, s"datatype ${s.dataType} is not supported", expr) None } case _ => - withInfo( + withFallbackReason( windowExpr, s"aggregate ${agg.aggregateFunction}" + " is not supported for window function", @@ -311,7 +311,9 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { val partitionColumnNames = partitionSpec.collect { case a: AttributeReference => a.name case other => - withInfo(op, s"Unsupported partition expression: ${other.getClass.getSimpleName}") + withFallbackReason( + op, + s"Unsupported partition expression: ${other.getClass.getSimpleName}") return false } @@ -319,7 +321,7 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { s.child match { case a: AttributeReference => a.name case other => - withInfo(op, s"Unsupported sort expression: ${other.getClass.getSimpleName}") + withFallbackReason(op, s"Unsupported sort expression: ${other.getClass.getSimpleName}") return false } } @@ -327,7 +329,7 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { if (partitionColumnNames.zip(orderColumnNames).exists { case (partCol, orderCol) => partCol != orderCol }) { - withInfo(op, "Partitioning and sorting specifications must be the same.") + withFallbackReason(op, "Partitioning and sorting specifications must be the same.") return false } diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala index 16e7a8b774..ee8b716ea3 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala @@ -50,7 +50,7 @@ import com.google.common.base.Objects import org.apache.comet.{CometConf, CometExplainInfo} import org.apache.comet.CometConf.{COMET_EXEC_SHUFFLE_ENABLED, COMET_SHUFFLE_MODE} -import org.apache.comet.CometSparkSessionExtensions.{hasExplainInfo, isCometShuffleManagerEnabled, withInfos} +import org.apache.comet.CometSparkSessionExtensions.{hasFallbackReason, isCometShuffleManagerEnabled, withFallbackReasons} import org.apache.comet.serde.{Compatible, OperatorOuterClass, QueryPlanSerde, SupportLevel, Unsupported} import org.apache.comet.serde.operator.CometSink import org.apache.comet.shims.{CometTypeShim, ShimCometShuffleExchangeExec} @@ -265,7 +265,8 @@ object CometShuffleExchangeExec /** * Decide which Comet shuffle path (if any) can handle this shuffle. Returns `None` if neither * native nor columnar shuffle can be used; in that case the node is tagged with the combined - * fallback reasons via `withInfos` so subsequent passes short-circuit via `hasExplainInfo`. + * fallback reasons via `withFallbackReasons` so subsequent passes short-circuit via + * `hasFallbackReason`. * * This is the single coordination point: the two path-specific predicates * (`nativeShuffleFailureReasons` / `columnarShuffleFailureReasons`) are pure - they return @@ -276,11 +277,11 @@ object CometShuffleExchangeExec // shuffle falls back to Spark and tagged it. Preserve that decision - re-deriving it against // a possibly-reshaped subtree (e.g. AQE stage-wrapping) can flip the answer and produce // inconsistent plans across passes (see #3949). - if (hasExplainInfo(s)) return None + if (hasFallbackReason(s)) return None isCometShuffleEnabledReason(s) match { case Some(reason) => - withInfos(s, Set(reason)) + withFallbackReasons(s, Set(reason)) return None case None => } @@ -291,7 +292,7 @@ object CometShuffleExchangeExec // On 3.5+ with AQE DPP, the scan converts to CometNativeScanExec and // stageContainsDPPScan won't match (it checks FileSourceScanExec). if (stageContainsDPPScan(s)) { - withInfos(s, Set("Stage contains a scan with Dynamic Partition Pruning")) + withFallbackReasons(s, Set("Stage contains a scan with Dynamic Partition Pruning")) return None } @@ -305,7 +306,7 @@ object CometShuffleExchangeExec if (!isCometPlan(s.child) && !CometConf.COMET_EXEC_SHUFFLE_CONVERT_FROM_SPARK_PLAN_ENABLED.get(s.conf)) { - withInfos( + withFallbackReasons( s, Set( s"${CometConf.COMET_EXEC_SHUFFLE_CONVERT_FROM_SPARK_PLAN_ENABLED.key} is disabled " + @@ -319,7 +320,7 @@ object CometShuffleExchangeExec } val combined = (nativeReasons ++ columnarReasons).toSet - if (combined.nonEmpty) withInfos(s, combined) + if (combined.nonEmpty) withFallbackReasons(s, combined) None } @@ -444,7 +445,7 @@ object CometShuffleExchangeExec reasons += s"unsupported range partitioning sort order: $o" // Roll up fallback reasons recorded on the sort-order expression (e.g. strict // floating-point sort) so they surface in the shuffle's explain output. - o.getTagValue(CometExplainInfo.EXTENSION_INFO).foreach(reasons ++= _) + o.getTagValue(CometExplainInfo.FALLBACK_REASONS).foreach(reasons ++= _) } } for (dt <- orderings.map(_.dataType).distinct) { diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala index 7d5398ae62..8cbf7c9189 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala @@ -51,7 +51,7 @@ import com.google.common.base.Objects import com.google.protobuf.CodedOutputStream import org.apache.comet.{CometConf, CometExecIterator, CometRuntimeException, ConfigEntry} -import org.apache.comet.CometSparkSessionExtensions.{isCometShuffleEnabled, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{isCometShuffleEnabled, withFallbackReason} import org.apache.comet.parquet.CometParquetUtils import org.apache.comet.rules.CometExecRule import org.apache.comet.serde.{CometOperatorSerde, Compatible, Incompatible, OperatorOuterClass, SupportLevel, Unsupported} @@ -823,7 +823,7 @@ object CometProjectExec extends CometOperatorSerde[ProjectExec] { .addAllProjectList(exprs.map(_.get).asJava) Some(builder.setProjection(projectBuilder).build()) } else { - withInfo(op, op.projectList: _*) + withFallbackReason(op, op.projectList: _*) None } } @@ -883,7 +883,7 @@ object CometFilterExec extends CometOperatorSerde[FilterExec] { .setPredicate(cond.get) Some(builder.setFilter(filterBuilder).build()) } else { - withInfo(op, op.condition, op.child) + withFallbackReason(op, op.condition, op.child) None } } @@ -944,7 +944,7 @@ object CometSortExec extends CometOperatorSerde[SortExec] { builder: Operator.Builder, childOp: Operator*): Option[OperatorOuterClass.Operator] = { if (!supportedSortType(op, op.sortOrder)) { - withInfo(op, "Unsupported data type in sort expressions") + withFallbackReason(op, "Unsupported data type in sort expressions") return None } @@ -956,7 +956,7 @@ object CometSortExec extends CometOperatorSerde[SortExec] { .addAllSortOrders(sortOrders.map(_.get).asJava) Some(builder.setSort(sortBuilder).build()) } else { - withInfo(op, "sort order not supported", op.sortOrder: _*) + withFallbackReason(op, "sort order not supported", op.sortOrder: _*) None } } @@ -1029,7 +1029,7 @@ object CometLocalLimitExec extends CometOperatorSerde[LocalLimitExec] { .setOffset(0) Some(builder.setLimit(limitBuilder).build()) } else { - withInfo(op, "No child operator") + withFallbackReason(op, "No child operator") None } } @@ -1090,7 +1090,7 @@ object CometGlobalLimitExec extends CometOperatorSerde[GlobalLimitExec] { Some(builder.setLimit(limitBuilder).build()) } else { - withInfo(op, "No child operator") + withFallbackReason(op, "No child operator") None } } @@ -1159,7 +1159,7 @@ object CometExpandExec extends CometOperatorSerde[ExpandExec] { .setNumExprPerProject(op.projections.head.size) Some(builder.setExpand(expandBuilder).build()) } else { - withInfo(op, allProjExprs: _*) + withFallbackReason(op, allProjExprs: _*) None } } @@ -1244,7 +1244,7 @@ object CometExplodeExec extends CometOperatorSerde[GenerateExec] { val childExprProto = exprToProto(childExpr, op.child.output) if (childExprProto.isEmpty) { - withInfo(op, childExpr) + withFallbackReason(op, childExpr) return None } @@ -1256,7 +1256,7 @@ object CometExplodeExec extends CometOperatorSerde[GenerateExec] { } if (projectExprs.exists(_.isEmpty) || childOp.isEmpty) { - withInfo(op, op.output: _*) + withFallbackReason(op, op.output: _*) return None } @@ -1422,13 +1422,15 @@ trait CometBaseAggregate { val sparkFinalMode = modes.contains(Final) && findCometPartialAgg(aggregate.child).isEmpty if (multiMode) { - withInfo(aggregate, s"Unsupported mixed aggregation modes: ${modes.mkString(", ")}") + withFallbackReason( + aggregate, + s"Unsupported mixed aggregation modes: ${modes.mkString(", ")}") return None } if (sparkFinalMode && !QueryPlanSerde.allAggsSupportMixedExecution(aggregate.aggregateExpressions)) { - withInfo( + withFallbackReason( aggregate, "Spark Final aggregate without Comet Partial requires compatible " + "intermediate buffer formats") @@ -1439,7 +1441,7 @@ trait CometBaseAggregate { // (Comet partial + Spark final with incompatible intermediate buffers) val unsafeReason = aggregate.getTagValue(CometExecRule.COMET_UNSAFE_PARTIAL) if (unsafeReason.isDefined) { - withInfo(aggregate, unsafeReason.get) + withFallbackReason(aggregate, unsafeReason.get) return None } @@ -1450,12 +1452,12 @@ trait CometBaseAggregate { val child = aggregate.child if (groupingExpressions.isEmpty && aggregateExpressions.isEmpty) { - withInfo(aggregate, "No group by or aggregation") + withFallbackReason(aggregate, "No group by or aggregation") return None } if (groupingExpressions.exists(expr => QueryPlanSerde.containsMapType(expr.dataType))) { - withInfo(aggregate, "Grouping on map-containing types is not supported") + withFallbackReason(aggregate, "Grouping on map-containing types is not supported") return None } @@ -1463,7 +1465,7 @@ trait CometBaseAggregate { // Collation-aware grouping requires collation-aware hashing/equality; Comet only // compares raw bytes, which would put rows that compare equal under the collation // into different groups. - withInfo(aggregate, "Grouping on non-default collated strings is not supported") + withFallbackReason(aggregate, "Grouping on non-default collated strings is not supported") return None } @@ -1475,7 +1477,9 @@ trait CometBaseAggregate { } if (emptyExprs.nonEmpty) { - withInfo(aggregate, s"Unsupported group expressions: ${emptyExprs.mkString(", ")}") + withFallbackReason( + aggregate, + s"Unsupported group expressions: ${emptyExprs.mkString(", ")}") return None } @@ -1501,7 +1505,7 @@ trait CometBaseAggregate { val attributes = groupingExpressions.map(_.toAttribute) ++ aggregateAttributes val resultExprs = resultExpressions.map(exprToProto(_, attributes)) if (resultExprs.exists(_.isEmpty)) { - withInfo( + withFallbackReason( aggregate, s"Unsupported result expressions found in: $resultExpressions", resultExpressions: _*) @@ -1517,7 +1521,9 @@ trait CometBaseAggregate { // - Mixed {Partial, PartialMerge} (for distinct aggregate plans) val isMixedPartialMerge = modeSet == Set(Partial, PartialMerge) if (modes.size > 1 && !isMixedPartialMerge) { - withInfo(aggregate, s"Unsupported mixed aggregation modes: ${modes.mkString(", ")}") + withFallbackReason( + aggregate, + s"Unsupported mixed aggregation modes: ${modes.mkString(", ")}") return None } @@ -1532,7 +1538,7 @@ trait CometBaseAggregate { case Final => CometAggregateMode.Final case PartialMerge => CometAggregateMode.PartialMerge case _ => - withInfo(aggregate, s"Unsupported aggregation mode ${modes.head}") + withFallbackReason(aggregate, s"Unsupported aggregation mode ${modes.head}") return None } } @@ -1548,7 +1554,7 @@ trait CometBaseAggregate { a.aggregateFunction.isInstanceOf[Last]) } if (unsupportedAggs.nonEmpty) { - withInfo( + withFallbackReason( aggregate, "PartialMerge not supported for aggregates: " + unsupportedAggs.map(_.aggregateFunction.prettyName).mkString(", ")) @@ -1565,7 +1571,7 @@ trait CometBaseAggregate { } if (aggExprs.exists(_.isEmpty)) { - withInfo( + withFallbackReason( aggregate, "Unsupported aggregate expression(s)", aggregateExpressions ++ aggregateExpressions.map(_.aggregateFunction): _*) @@ -1581,7 +1587,7 @@ trait CometBaseAggregate { val attributes = groupingExpressions.map(_.toAttribute) ++ aggregateAttributes val resultExprs = resultExpressions.map(exprToProto(_, attributes)) if (resultExprs.exists(_.isEmpty)) { - withInfo( + withFallbackReason( aggregate, s"Unsupported result expressions found in: $resultExpressions", resultExpressions: _*) @@ -1599,7 +1605,7 @@ trait CometBaseAggregate { case PartialMerge => CometAggregateMode.PartialMerge case Final => CometAggregateMode.Final case other => - withInfo(aggregate, s"Unsupported aggregation mode $other") + withFallbackReason(aggregate, s"Unsupported aggregation mode $other") return None } } @@ -1611,7 +1617,7 @@ trait CometBaseAggregate { } else { val allChildren: Seq[Expression] = groupingExpressions ++ aggregateExpressions ++ aggregateAttributes - withInfo(aggregate, allChildren: _*) + withFallbackReason(aggregate, allChildren: _*) None } } @@ -1840,7 +1846,7 @@ trait CometHashJoin { join.isInstanceOf[ShuffledHashJoinExec]) && !(CometConf.COMET_EXEC_BROADCAST_HASH_JOIN_ENABLED.get(join.conf) && join.isInstanceOf[BroadcastHashJoinExec])) { - withInfo(join, s"Invalid hash join type ${join.nodeName}") + withFallbackReason(join, s"Invalid hash join type ${join.nodeName}") return None } @@ -1852,7 +1858,7 @@ trait CometHashJoin { val joinKeys = join.leftKeys ++ join.rightKeys if (joinKeys.exists(key => isStringCollationType(key.dataType))) { - withInfo(join, "unsupported non-default collated string join keys") + withFallbackReason(join, "unsupported non-default collated string join keys") return None } @@ -1863,7 +1869,7 @@ trait CometHashJoin { (join.leftKeys.length != 1 || join.rightKeys.length != 1 || join.joinType != LeftAnti || join.buildSide != BuildRight || join.condition.isDefined)) { - withInfo( + withFallbackReason( join, "null-aware anti-join requires single-column LeftAnti BuildRight with no condition") return None @@ -1872,7 +1878,7 @@ trait CometHashJoin { val condition = join.condition.map { cond => val condProto = exprToProto(cond, join.left.output ++ join.right.output) if (condProto.isEmpty) { - withInfo(join, cond) + withFallbackReason(join, cond) return None } condProto.get @@ -1889,7 +1895,7 @@ trait CometHashJoin { case LeftAnti => JoinType.LeftAnti case _ => // Spark doesn't support other join types - withInfo(join, s"Unsupported join type ${join.joinType}") + withFallbackReason(join, s"Unsupported join type ${join.joinType}") return None } } @@ -1912,7 +1918,7 @@ trait CometHashJoin { Some(builder.setHashJoin(joinBuilder).build()) } else { val allExprs: Seq[Expression] = joinKeys - withInfo(join, allExprs: _*) + withFallbackReason(join, allExprs: _*) None } } @@ -2200,7 +2206,7 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { if (join.condition.isDefined && !CometConf.COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED .get(join.conf)) { - withInfo( + withFallbackReason( join, s"${CometConf.COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED.key} is not enabled", join.condition.get) @@ -2210,7 +2216,7 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { val condition = join.condition.map { cond => val condProto = exprToProto(cond, join.left.output ++ join.right.output) if (condProto.isEmpty) { - withInfo(join, cond) + withFallbackReason(join, cond) return None } condProto.get @@ -2227,14 +2233,14 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { case LeftAnti => JoinType.LeftAnti case _ => // Spark doesn't support other join types - withInfo(join, s"Unsupported join type ${join.joinType}") + withFallbackReason(join, s"Unsupported join type ${join.joinType}") return None } } val joinKeys = join.leftKeys ++ join.rightKeys if (joinKeys.exists(key => isStringCollationType(key.dataType))) { - withInfo(join, "unsupported non-default collated string join keys") + withFallbackReason(join, "unsupported non-default collated string join keys") return None } @@ -2248,7 +2254,7 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { } if (errorMsgs.nonEmpty) { - withInfo(join, errorMsgs.mkString("\n")) + withFallbackReason(join, errorMsgs.mkString("\n")) return None } @@ -2272,7 +2278,7 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { Some(builder.setSortMergeJoin(joinBuilder).build()) } else { val allExprs: Seq[Expression] = joinKeys - withInfo(join, allExprs: _*) + withFallbackReason(join, allExprs: _*) None } } diff --git a/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala index d3e3270700..1444f3e669 100644 --- a/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala @@ -23,11 +23,11 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.Sum import org.apache.spark.sql.types.DataTypes -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. @@ -75,7 +75,7 @@ trait CometExprShim extends CommonStringExprs { .setToPrettyString(toPrettyString) .build()) case _ => - withInfo(expr, child) + withFallbackReason(expr, child) None } } else { @@ -85,7 +85,7 @@ trait CometExprShim extends CommonStringExprs { case wb: WidthBucket => val childExprs = wb.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) - optExprWithInfo(optExpr, wb, wb.children: _*) + optExprWithFallbackReason(optExpr, wb, wb.children: _*) case _ => None } diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala index 868b09de9d..85a8e9b292 100644 --- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala @@ -29,11 +29,11 @@ import org.apache.spark.sql.internal.types.StringTypeWithCollation import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, DataTypes, MapType, StringType} import org.apache.comet.{CometConf, CometExplainInfo} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible, SupportLevel} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. @@ -75,7 +75,7 @@ trait CometExprShim extends CommonStringExprs { returnType, false, arrayExprProto) - optExprWithInfo(scalarExpr, knc, arrayChild) + optExprWithFallbackReason(scalarExpr, knc, arrayChild) case _ => exprToProtoInternal(knc.child, inputs, binding) } case _ => exprToProtoInternal(knc.child, inputs, binding) @@ -122,7 +122,7 @@ trait CometExprShim extends CommonStringExprs { .setToPrettyString(toPrettyString) .build()) case _ => - withInfo(expr, child) + withFallbackReason(expr, child) None } } else { @@ -132,7 +132,7 @@ trait CometExprShim extends CommonStringExprs { case wb: WidthBucket => val childExprs = wb.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) - optExprWithInfo(optExpr, wb, wb.children: _*) + optExprWithFallbackReason(optExpr, wb, wb.children: _*) // In Spark 4.x, RuntimeReplaceable expressions (StructsToJson, ParseUrl) become // Invoke(Literal(Evaluator), "evaluate", ...). Reconstruct the original expression @@ -144,8 +144,8 @@ trait CometExprShim extends CommonStringExprs { val exprProto = exprToProtoInternal(toJson, inputs, binding) if (exprProto.isEmpty) { toJson - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } exprProto case (Literal(evaluator: ParseUrlEvaluator, _), "evaluate", args) => @@ -153,8 +153,8 @@ trait CometExprShim extends CommonStringExprs { val result = exprToProtoInternal(parseUrl, inputs, binding) if (result.isEmpty) { parseUrl - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } result case _ => None @@ -163,11 +163,11 @@ trait CometExprShim extends CommonStringExprs { case ms: MapSort => val keyType = ms.dataType.asInstanceOf[MapType].keyType if (!supportedScalarSortElementType(keyType)) { - withInfo(ms, s"MapSort on map with key type $keyType is not supported") + withFallbackReason(ms, s"MapSort on map with key type $keyType is not supported") None } else if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get() && SupportLevel.containsFloatingPoint(keyType)) { - withInfo( + withFallbackReason( ms, "MapSort on floating-point key is not 100% compatible with Spark, and Comet is " + s"running with ${CometConf.COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + @@ -180,7 +180,7 @@ trait CometExprShim extends CommonStringExprs { ms.dataType, failOnError = false, childExpr) - optExprWithInfo(mapSortExpr, ms, ms.child) + optExprWithFallbackReason(mapSortExpr, ms, ms.child) } case _ => None diff --git a/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala index 676cb468b4..1e31360eb0 100644 --- a/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala @@ -30,11 +30,11 @@ import org.apache.spark.sql.internal.types.StringTypeWithCollation import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, DataTypes, MapType, StringType, TimeType} import org.apache.comet.{CometConf, CometExplainInfo} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible, SupportLevel} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. @@ -75,7 +75,7 @@ trait CometExprShim extends CommonStringExprs { returnType, false, arrayExprProto) - optExprWithInfo(scalarExpr, knc, arrayChild) + optExprWithFallbackReason(scalarExpr, knc, arrayChild) case _ => exprToProtoInternal(knc.child, inputs, binding) } case _ => exprToProtoInternal(knc.child, inputs, binding) @@ -102,7 +102,7 @@ trait CometExprShim extends CommonStringExprs { val childExprs = s.arguments.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProtoWithReturnType("make_time", s.dataType, true, childExprs: _*) - optExprWithInfo(optExpr, expr, s.arguments: _*) + optExprWithFallbackReason(optExpr, expr, s.arguments: _*) case expr @ ToPrettyString(child, timeZoneId) => val castSupported = CometCast.isSupported( @@ -132,7 +132,7 @@ trait CometExprShim extends CommonStringExprs { .setToPrettyString(toPrettyString) .build()) case _ => - withInfo(expr, child) + withFallbackReason(expr, child) None } } else { @@ -142,7 +142,7 @@ trait CometExprShim extends CommonStringExprs { case wb: WidthBucket => val childExprs = wb.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) - optExprWithInfo(optExpr, wb, wb.children: _*) + optExprWithFallbackReason(optExpr, wb, wb.children: _*) // In Spark 4.x, RuntimeReplaceable expressions (StructsToJson, ParseUrl) become // Invoke(Literal(Evaluator), "evaluate", ...). Reconstruct the original expression @@ -154,8 +154,8 @@ trait CometExprShim extends CommonStringExprs { val exprProto = exprToProtoInternal(toJson, inputs, binding) if (exprProto.isEmpty) { toJson - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } exprProto case (Literal(evaluator: ParseUrlEvaluator, _), "evaluate", args) => @@ -163,8 +163,8 @@ trait CometExprShim extends CommonStringExprs { val result = exprToProtoInternal(parseUrl, inputs, binding) if (result.isEmpty) { parseUrl - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } result case (Literal(parser: ToTimeParser, _), "parse", args) @@ -172,7 +172,7 @@ trait CometExprShim extends CommonStringExprs { val childExprs = args.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProtoWithReturnType("to_time", i.dataType, true, childExprs: _*) - optExprWithInfo(optExpr, i, args: _*) + optExprWithFallbackReason(optExpr, i, args: _*) case _ => None } @@ -187,18 +187,18 @@ trait CometExprShim extends CommonStringExprs { i.dataType, false, childExprs: _*) - optExprWithInfo(optExpr, expr, args: _*) + optExprWithFallbackReason(optExpr, expr, args: _*) case _ => None } case ms: MapSort => val keyType = ms.dataType.asInstanceOf[MapType].keyType if (!supportedScalarSortElementType(keyType)) { - withInfo(ms, s"MapSort on map with key type $keyType is not supported") + withFallbackReason(ms, s"MapSort on map with key type $keyType is not supported") None } else if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get() && SupportLevel.containsFloatingPoint(keyType)) { - withInfo( + withFallbackReason( ms, "MapSort on floating-point key is not 100% compatible with Spark, and Comet is " + s"running with ${CometConf.COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + @@ -211,7 +211,7 @@ trait CometExprShim extends CommonStringExprs { ms.dataType, failOnError = false, childExpr) - optExprWithInfo(mapSortExpr, ms, ms.child) + optExprWithFallbackReason(mapSortExpr, ms, ms.child) } case _ => None diff --git a/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala index 676cb468b4..1e31360eb0 100644 --- a/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala @@ -30,11 +30,11 @@ import org.apache.spark.sql.internal.types.StringTypeWithCollation import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, DataTypes, MapType, StringType, TimeType} import org.apache.comet.{CometConf, CometExplainInfo} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible, SupportLevel} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. @@ -75,7 +75,7 @@ trait CometExprShim extends CommonStringExprs { returnType, false, arrayExprProto) - optExprWithInfo(scalarExpr, knc, arrayChild) + optExprWithFallbackReason(scalarExpr, knc, arrayChild) case _ => exprToProtoInternal(knc.child, inputs, binding) } case _ => exprToProtoInternal(knc.child, inputs, binding) @@ -102,7 +102,7 @@ trait CometExprShim extends CommonStringExprs { val childExprs = s.arguments.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProtoWithReturnType("make_time", s.dataType, true, childExprs: _*) - optExprWithInfo(optExpr, expr, s.arguments: _*) + optExprWithFallbackReason(optExpr, expr, s.arguments: _*) case expr @ ToPrettyString(child, timeZoneId) => val castSupported = CometCast.isSupported( @@ -132,7 +132,7 @@ trait CometExprShim extends CommonStringExprs { .setToPrettyString(toPrettyString) .build()) case _ => - withInfo(expr, child) + withFallbackReason(expr, child) None } } else { @@ -142,7 +142,7 @@ trait CometExprShim extends CommonStringExprs { case wb: WidthBucket => val childExprs = wb.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) - optExprWithInfo(optExpr, wb, wb.children: _*) + optExprWithFallbackReason(optExpr, wb, wb.children: _*) // In Spark 4.x, RuntimeReplaceable expressions (StructsToJson, ParseUrl) become // Invoke(Literal(Evaluator), "evaluate", ...). Reconstruct the original expression @@ -154,8 +154,8 @@ trait CometExprShim extends CommonStringExprs { val exprProto = exprToProtoInternal(toJson, inputs, binding) if (exprProto.isEmpty) { toJson - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } exprProto case (Literal(evaluator: ParseUrlEvaluator, _), "evaluate", args) => @@ -163,8 +163,8 @@ trait CometExprShim extends CommonStringExprs { val result = exprToProtoInternal(parseUrl, inputs, binding) if (result.isEmpty) { parseUrl - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } result case (Literal(parser: ToTimeParser, _), "parse", args) @@ -172,7 +172,7 @@ trait CometExprShim extends CommonStringExprs { val childExprs = args.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProtoWithReturnType("to_time", i.dataType, true, childExprs: _*) - optExprWithInfo(optExpr, i, args: _*) + optExprWithFallbackReason(optExpr, i, args: _*) case _ => None } @@ -187,18 +187,18 @@ trait CometExprShim extends CommonStringExprs { i.dataType, false, childExprs: _*) - optExprWithInfo(optExpr, expr, args: _*) + optExprWithFallbackReason(optExpr, expr, args: _*) case _ => None } case ms: MapSort => val keyType = ms.dataType.asInstanceOf[MapType].keyType if (!supportedScalarSortElementType(keyType)) { - withInfo(ms, s"MapSort on map with key type $keyType is not supported") + withFallbackReason(ms, s"MapSort on map with key type $keyType is not supported") None } else if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get() && SupportLevel.containsFloatingPoint(keyType)) { - withInfo( + withFallbackReason( ms, "MapSort on floating-point key is not 100% compatible with Spark, and Comet is " + s"running with ${CometConf.COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + @@ -211,7 +211,7 @@ trait CometExprShim extends CommonStringExprs { ms.dataType, failOnError = false, childExpr) - optExprWithInfo(mapSortExpr, ms, ms.child) + optExprWithFallbackReason(mapSortExpr, ms, ms.child) } case _ => None diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index a172538f45..4f39d46e30 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -938,7 +938,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("withInfo") { + test("withFallbackReason") { val table = "with_info" withTable(table) { sql(s"create table $table(id int, name varchar(20)) using parquet") @@ -947,14 +947,14 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { val (_, cometPlan) = checkSparkAnswerAndOperator(query) val project = stripAQEPlan(cometPlan).collectFirst { case p: CometProjectExec => p }.get val id = project.expressions.head - CometSparkSessionExtensions.withInfo(id, "reason 1") - CometSparkSessionExtensions.withInfo(project, "reason 2") - CometSparkSessionExtensions.withInfo(project, "reason 3", id) - CometSparkSessionExtensions.withInfo(project, id) - CometSparkSessionExtensions.withInfo(project, "reason 4") - CometSparkSessionExtensions.withInfo(project, "reason 5", id) - CometSparkSessionExtensions.withInfo(project, id) - CometSparkSessionExtensions.withInfo(project, "reason 6") + CometSparkSessionExtensions.withFallbackReason(id, "reason 1") + CometSparkSessionExtensions.withFallbackReason(project, "reason 2") + CometSparkSessionExtensions.withFallbackReason(project, "reason 3", id) + CometSparkSessionExtensions.withFallbackReason(project, id) + CometSparkSessionExtensions.withFallbackReason(project, "reason 4") + CometSparkSessionExtensions.withFallbackReason(project, "reason 5", id) + CometSparkSessionExtensions.withFallbackReason(project, id) + CometSparkSessionExtensions.withFallbackReason(project, "reason 6") val explain = new ExtendedExplainInfo().generateExtendedInfo(project) for (i <- 1 until 7) { assert(explain.contains(s"reason $i")) diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/CometDppFallbackRepro3949Suite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/CometDppFallbackRepro3949Suite.scala index 56a1b44070..f672ebc082 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/CometDppFallbackRepro3949Suite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/CometDppFallbackRepro3949Suite.scala @@ -209,7 +209,7 @@ class CometDppFallbackRepro3949Suite extends CometTestBase { def walk(p: SparkPlan): Unit = { p match { case s: CometShuffleExchangeExec => - val tags = s.getTagValue(CometExplainInfo.EXTENSION_INFO).getOrElse(Set.empty[String]) + val tags = s.getTagValue(CometExplainInfo.FALLBACK_REASONS).getOrElse(Set.empty[String]) if (tags.exists(_.contains("Dynamic Partition Pruning"))) acc += s case _ => } diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/CometShuffleFallbackStickinessSuite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/CometShuffleFallbackStickinessSuite.scala index b671e04042..23f5ad5d41 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/CometShuffleFallbackStickinessSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/CometShuffleFallbackStickinessSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.internal.SQLConf import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.{hasExplainInfo, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{hasFallbackReason, withFallbackReason} /** * Pins the sticky-fallback invariant for Comet shuffle decisions: `shuffleSupported` must return @@ -43,14 +43,14 @@ import org.apache.comet.CometSparkSessionExtensions.{hasExplainInfo, withInfo} * at initial planning and then convert to Comet at stage prep, producing plan-shape * inconsistencies across the two passes (suspected mechanism behind #3949). * - * The coordinator tags the node with `withInfos` only on total fallback and short-circuits via - * `hasExplainInfo` on subsequent passes. + * The coordinator tags the node with `withFallbackReasons` only on total fallback and + * short-circuits via `hasFallbackReason` on subsequent passes. */ class CometShuffleFallbackStickinessSuite extends CometTestBase { test("shuffleSupported returns None when the shuffle already carries explain info") { val shuffle = ShuffleExchangeExec(SinglePartition, SyntheticLeaf(Nil)) - withInfo(shuffle, "pretend prior pass decided Spark fallback") + withFallbackReason(shuffle, "pretend prior pass decided Spark fallback") assert( CometShuffleExchangeExec.shuffleSupported(shuffle).isEmpty, @@ -109,7 +109,7 @@ class CometShuffleFallbackStickinessSuite extends CometTestBase { // Pass 1: real DPP subtree visible. Returns None AND tags the shuffle. val first = CometShuffleExchangeExec.shuffleSupported(shuffle) assert(first.isEmpty, "initial pass must fall back (DPP visible)") - assert(hasExplainInfo(shuffle), "fallback reason must be tagged on the shuffle") + assert(hasFallbackReason(shuffle), "fallback reason must be tagged on the shuffle") // Pass 2 simulates AQE stage-prep: replace the child with an opaque leaf that hides // the DPP subtree from tree walks. A naive `.exists`-based check would flip to "convert" diff --git a/spark/src/test/spark-4.0/org/apache/spark/sql/CometCollationSuite.scala b/spark/src/test/spark-4.0/org/apache/spark/sql/CometCollationSuite.scala index 4623f4591e..2c8451fc36 100644 --- a/spark/src/test/spark-4.0/org/apache/spark/sql/CometCollationSuite.scala +++ b/spark/src/test/spark-4.0/org/apache/spark/sql/CometCollationSuite.scala @@ -110,7 +110,7 @@ class CometCollationSuite extends CometTestBase { } private def assertFallbackReason(plan: SparkPlan, expectedReason: String): Unit = { - val reasons = plan.getTagValue(CometExplainInfo.EXTENSION_INFO).getOrElse(Set.empty[String]) + val reasons = plan.getTagValue(CometExplainInfo.FALLBACK_REASONS).getOrElse(Set.empty[String]) assert( reasons.contains(expectedReason), s"Expected fallback reason '$expectedReason' on ${plan.nodeName}, got: $reasons")