From 40fbd7d8ac59bb6ff2a5216d8eaa28c366426402 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 28 May 2026 18:17:20 -0600 Subject: [PATCH 1/2] refactor: rename withInfo to withFallbackReason for clarity Rename withInfo/withInfos/hasExplainInfo and EXTENSION_INFO to withFallbackReason/withFallbackReasons/hasFallbackReason and FALLBACK_REASONS to match their actual semantics (fallback reasons, not generic info). Also rename the private extensionInfo helper in ExtendedExplainInfo to fallbackReasons, and update the TreeNodeTag string from "CometExtensionInfo" to "CometFallbackReasons" so a future PR can reuse the old string for a distinct tag. --- .../comet/CometSparkSessionExtensions.scala | 31 +++---- .../apache/comet/ExtendedExplainInfo.scala | 12 +-- .../codegen/CometBatchKernelCodegen.scala | 4 +- .../apache/comet/expressions/CometCast.scala | 6 +- .../apache/comet/rules/CometExecRule.scala | 20 ++--- .../apache/comet/rules/CometScanRule.scala | 50 ++++++------ .../org/apache/comet/rules/RewriteJoin.scala | 4 +- .../serde/CometBloomFilterMightContain.scala | 4 +- .../apache/comet/serde/CometScalaUDF.scala | 18 +++-- .../comet/serde/CometScalarSubquery.scala | 8 +- .../apache/comet/serde/CometSortOrder.scala | 4 +- .../apache/comet/serde/QueryPlanSerde.scala | 32 ++++---- .../org/apache/comet/serde/aggregates.scala | 72 ++++++++--------- .../org/apache/comet/serde/arithmetic.scala | 24 +++--- .../scala/org/apache/comet/serde/arrays.scala | 33 ++++---- .../org/apache/comet/serde/conditional.scala | 12 +-- .../comet/serde/contraintExpressions.scala | 4 +- .../org/apache/comet/serde/datetime.scala | 22 ++--- .../scala/org/apache/comet/serde/hash.scala | 12 +-- .../org/apache/comet/serde/literals.scala | 6 +- .../scala/org/apache/comet/serde/math.scala | 8 +- .../apache/comet/serde/namedExpressions.scala | 8 +- .../operator/CometDataWritingCommand.scala | 8 +- .../serde/operator/CometNativeScan.scala | 18 +++-- .../comet/serde/operator/CometSink.scala | 10 +-- .../org/apache/comet/serde/predicates.scala | 4 +- .../org/apache/comet/serde/statics.scala | 4 +- .../org/apache/comet/serde/strings.scala | 32 ++++---- .../org/apache/comet/serde/structs.scala | 20 ++--- .../org/apache/comet/serde/unixtime.scala | 6 +- .../spark/sql/comet/CometWindowExec.scala | 20 ++--- .../shuffle/CometShuffleExchangeExec.scala | 17 ++-- .../apache/spark/sql/comet/operators.scala | 80 ++++++++++--------- .../apache/comet/shims/CometExprShim.scala | 4 +- .../apache/comet/shims/CometExprShim.scala | 16 ++-- .../apache/comet/shims/CometExprShim.scala | 16 ++-- .../apache/comet/shims/CometExprShim.scala | 16 ++-- .../apache/comet/CometExpressionSuite.scala | 18 ++--- .../CometDppFallbackRepro3949Suite.scala | 2 +- .../CometShuffleFallbackStickinessSuite.scala | 10 +-- .../spark/sql/CometCollationSuite.scala | 2 +- 41 files changed, 364 insertions(+), 333 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala index 1ae90e1845..6c4a92f312 100644 --- a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala +++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala @@ -290,21 +290,22 @@ object CometSparkSessionExtensions extends Logging { * @return * `node` with fallback reasons attached (as a side effect on its tag map). */ - def withInfo[T <: TreeNode[_]](node: T, info: String, exprs: T*): T = { + def withFallbackReason[T <: TreeNode[_]](node: T, info: String, exprs: T*): T = { // support existing approach of passing in multiple infos in a newline-delimited string val infoSet = if (info == null || info.isEmpty) { Set.empty[String] } else { info.split("\n").toSet } - withInfos(node, infoSet, exprs: _*) + withFallbackReasons(node, infoSet, exprs: _*) } /** * Record one or more fallback reasons on a `TreeNode` and roll up reasons from any child nodes. - * This is the set-valued form of [[withInfo]]; see that overload for the full contract. + * This is the set-valued form of [[withFallbackReason]]; see that overload for the full + * contract. * - * Reasons are accumulated (never overwritten) on the node's `EXTENSION_INFO` tag and are + * Reasons are accumulated (never overwritten) on the node's `FALLBACK_REASONS` tag and are * surfaced in extended explain output. When `COMET_LOG_FALLBACK_REASONS` is enabled, each new * reason is also emitted as a warning. * @@ -320,16 +321,16 @@ object CometSparkSessionExtensions extends Logging { * @return * `node` with fallback reasons attached (as a side effect on its tag map). */ - def withInfos[T <: TreeNode[_]](node: T, info: Set[String], exprs: T*): T = { + def withFallbackReasons[T <: TreeNode[_]](node: T, info: Set[String], exprs: T*): T = { if (CometConf.COMET_LOG_FALLBACK_REASONS.get()) { for (reason <- info) { logWarning(s"Comet cannot accelerate ${node.getClass.getSimpleName} because: $reason") } } - val existingNodeInfos = node.getTagValue(CometExplainInfo.EXTENSION_INFO) + val existingNodeInfos = node.getTagValue(CometExplainInfo.FALLBACK_REASONS) val newNodeInfo = (existingNodeInfos ++ exprs - .flatMap(_.getTagValue(CometExplainInfo.EXTENSION_INFO))).flatten.toSet - node.setTagValue(CometExplainInfo.EXTENSION_INFO, newNodeInfo ++ info) + .flatMap(_.getTagValue(CometExplainInfo.FALLBACK_REASONS))).flatten.toSet + node.setTagValue(CometExplainInfo.FALLBACK_REASONS, newNodeInfo ++ info) node } @@ -347,17 +348,17 @@ object CometSparkSessionExtensions extends Logging { * @return * `node` with the rolled-up reasons attached (as a side effect on its tag map). */ - def withInfo[T <: TreeNode[_]](node: T, exprs: T*): T = { - withInfos(node, Set.empty, exprs: _*) + def withFallbackReason[T <: TreeNode[_]](node: T, exprs: T*): T = { + withFallbackReasons(node, Set.empty, exprs: _*) } /** - * True if any fallback reason has been recorded on `node` (via [[withInfo]] / [[withInfos]]). - * Callers that need to short-circuit when a prior rule pass has already decided a node falls - * back can use this as the sticky signal. + * True if any fallback reason has been recorded on `node` (via [[withFallbackReason]] / + * [[withFallbackReasons]]). Callers that need to short-circuit when a prior rule pass has + * already decided a node falls back can use this as the sticky signal. */ - def hasExplainInfo(node: TreeNode[_]): Boolean = { - node.getTagValue(CometExplainInfo.EXTENSION_INFO).exists(_.nonEmpty) + def hasFallbackReason(node: TreeNode[_]): Boolean = { + node.getTagValue(CometExplainInfo.FALLBACK_REASONS).exists(_.nonEmpty) } } diff --git a/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala b/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala index d30a1fe788..592b1955f2 100644 --- a/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala +++ b/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala @@ -50,15 +50,17 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator { } def getFallbackReasons(plan: SparkPlan): Seq[String] = { - extensionInfo(plan).toSeq.sorted + fallbackReasons(plan).toSeq.sorted } - private[comet] def extensionInfo(node: TreeNode[_]): Set[String] = { + private[comet] def fallbackReasons(node: TreeNode[_]): Set[String] = { var info = mutable.Seq[String]() val sorted = sortup(node) sorted.foreach { p => val all: Set[String] = - getActualPlan(p).getTagValue(CometExplainInfo.EXTENSION_INFO).getOrElse(Set.empty[String]) + getActualPlan(p) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .getOrElse(Set.empty[String]) for (s <- all) { info = info :+ s } @@ -120,7 +122,7 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator { outString.append(if (lastChildren.last) "+- " else ":- ") } - val tagValue = node.getTagValue(CometExplainInfo.EXTENSION_INFO) + val tagValue = node.getTagValue(CometExplainInfo.FALLBACK_REASONS) val str = if (tagValue.nonEmpty) { s" ${node.nodeName} [COMET: ${tagValue.get.mkString(", ")}]" } else { @@ -212,7 +214,7 @@ object CometCoverageStats { } object CometExplainInfo { - val EXTENSION_INFO = new TreeNodeTag[Set[String]]("CometExtensionInfo") + val FALLBACK_REASONS = new TreeNodeTag[Set[String]]("CometFallbackReasons") def getActualPlan(node: TreeNode[_]): TreeNode[_] = { node match { diff --git a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala index 042fd9ced3..6e77182cac 100644 --- a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala +++ b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala @@ -103,8 +103,8 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim { /** * Plan-time predicate. `None` greenlights the serde to emit the codegen proto; `Some(reason)` - * forces a Spark fallback (typically `withInfo(...) + None`) so the operator falls back cleanly - * rather than crashing the Janino compile at execute time. + * forces a Spark fallback (typically `withFallbackReason(...) + None`) so the operator falls + * back cleanly rather than crashing the Janino compile at execute time. * * Checks every `BoundReference`'s data type and the root `expr.dataType` against * [[isSupportedDataType]], rejects aggregates / generators / `CodegenFallback` (other than diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala index 400229a402..8ecfdfe49c 100644 --- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala +++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{ArrayType, DataType, DataTypes, DecimalType, NullType, StructType, TimestampNTZType, TimestampType} import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.{isSpark40Plus, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{isSpark40Plus, withFallbackReason} import org.apache.comet.serde.{CometExpressionSerde, Compatible, ExprOuterClass, Incompatible, SupportLevel, Unsupported} import org.apache.comet.serde.ExprOuterClass.Expr import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProtoInternal, serializeDataType} @@ -81,7 +81,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { if (childExpr.isDefined) { castToProto(cast, cast.timeZoneId, cast.dataType, childExpr.get, cometEvalMode) } else { - withInfo(cast, cast.child) + withFallbackReason(cast, cast.child) None } } @@ -131,7 +131,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { .setCast(castBuilder) .build()) case _ => - withInfo(expr, s"Unsupported datatype in castToProto: $dt") + withFallbackReason(expr, s"Unsupported datatype in castToProto: $dt") None } } diff --git a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala index aeb7db40ad..d116d2f407 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala @@ -328,8 +328,8 @@ case class CometExecRule(session: SparkSession) } else { // copy fallback reasons to the original plan newPlan - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => withInfos(plan, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => withFallbackReasons(plan, reasons)) // return the original plan plan } @@ -382,8 +382,8 @@ case class CometExecRule(session: SparkSession) // reasons. // 3. The operator has children that could not be converted, so execution // has already fallen back to Spark. - if (op.children.forall(_.isInstanceOf[CometNativeExec]) && !hasExplainInfo(op)) { - withInfo(op, s"${op.nodeName} is not supported") + if (op.children.forall(_.isInstanceOf[CometNativeExec]) && !hasFallbackReason(op)) { + withFallbackReason(op, s"${op.nodeName} is not supported") } else { op } @@ -587,7 +587,7 @@ case class CometExecRule(session: SparkSession) // config is enabled) if (CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.get()) { val info = new ExtendedExplainInfo() - if (info.extensionInfo(newPlan).nonEmpty) { + if (info.fallbackReasons(newPlan).nonEmpty) { logWarning( "Comet cannot execute some parts of this plan natively " + s"(set ${CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key}=false " + @@ -693,7 +693,9 @@ case class CometExecRule(session: SparkSession) case other => Seq(other) } if (!dataProducingChildren.forall(_.isInstanceOf[CometNativeExec])) { - withInfo(op, "Cannot perform native operation because input is not in Arrow format") + withFallbackReason( + op, + "Cannot perform native operation because input is not in Arrow format") return None } } @@ -721,7 +723,7 @@ case class CometExecRule(session: SparkSession) if (handler.enabledConfig.forall(_.get(op.conf))) { handler.getSupportLevel(op) match { case Unsupported(notes) => - withInfo(op, notes.getOrElse("")) + withFallbackReason(op, notes.getOrElse("")) false case Incompatible(notes) => val allowIncompat = CometConf.isOperatorAllowIncompat(opName) @@ -735,7 +737,7 @@ case class CometExecRule(session: SparkSession) true } else { val optionalNotes = notes.map(str => s" ($str)").getOrElse("") - withInfo( + withFallbackReason( op, s"$opName is not fully compatible with Spark$optionalNotes. " + s"To enable it anyway, set $incompatConf=true. " + @@ -749,7 +751,7 @@ case class CometExecRule(session: SparkSession) true } } else { - withInfo( + withFallbackReason( op, s"Native support for operator $opName is disabled. " + s"Set ${handler.enabledConfig.get.key}=true to enable it.") diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala index 7601fa1c6b..6dfcdcff25 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala @@ -42,7 +42,7 @@ import org.apache.spark.sql.types._ import org.apache.comet.{CometConf, DataTypeSupport} import org.apache.comet.CometConf._ -import org.apache.comet.CometSparkSessionExtensions.{isCometLoaded, isSpark35Plus, withInfo, withInfos} +import org.apache.comet.CometSparkSessionExtensions.{isCometLoaded, isSpark35Plus, withFallbackReason, withFallbackReasons} import org.apache.comet.DataTypeSupport.isComplexType import org.apache.comet.iceberg.{CometIcebergNativeScanMetadata, IcebergReflection} import org.apache.comet.objectstore.NativeConfig @@ -119,13 +119,13 @@ case class CometScanRule(session: SparkSession) // Tagged by CometSpark34AqeDppFallbackRule on Spark < 3.5 to keep a peer scan // Spark-native for canonical symmetry in SMJ self-joins (SPARK-32509). case scan if scan.getTagValue(CometScanRule.SKIP_COMET_SCAN_TAG).isDefined => - withInfo(scan, "AQE DPP region fallback (Spark < 3.5)") + withFallbackReason(scan, "AQE DPP region fallback (Spark < 3.5)") case scan if !CometConf.COMET_NATIVE_SCAN_ENABLED.get(conf) => - withInfo(scan, "Comet Scan is not enabled") + withFallbackReason(scan, "Comet Scan is not enabled") case scan if hasMetadataCol(scan) => - withInfo(scan, "Metadata column is not supported") + withFallbackReason(scan, "Metadata column is not supported") // data source V1 case scanExec: FileSourceScanExec => @@ -134,7 +134,7 @@ case class CometScanRule(session: SparkSession) // data source V2 case scanExec: BatchScanExec => if (isIcebergMetadataTable(scanExec)) { - withInfo(scanExec, "Iceberg Metadata tables are not supported") + withFallbackReason(scanExec, "Iceberg Metadata tables are not supported") } else { transformV2Scan(scanExec) } @@ -157,13 +157,13 @@ case class CometScanRule(session: SparkSession) // On 3.5+, CometPlanAdaptiveDynamicPruningFilters rewrites SABs directly and this fallback // is not needed. if (!isSpark35Plus && scanExec.partitionFilters.exists(isAqeDynamicPruningFilter)) { - return withInfo(scanExec, "AQE Dynamic Partition Pruning requires Spark 3.5+") + return withFallbackReason(scanExec, "AQE Dynamic Partition Pruning requires Spark 3.5+") } scanExec.relation match { case r: HadoopFsRelation => if (!CometScanExec.isFileFormatSupported(r.fileFormat)) { - return withInfo(scanExec, s"Unsupported file format ${r.fileFormat}") + return withFallbackReason(scanExec, s"Unsupported file format ${r.fileFormat}") } val hadoopConf = r.sparkSession.sessionState.newHadoopConfWithOptions(r.options) @@ -176,7 +176,7 @@ case class CometScanRule(session: SparkSession) // Spark already converted these to Java-native types, so we can't check SQL types. // ArrayBasedMapData, GenericInternalRow, GenericArrayData correspond to maps, structs, // and arrays respectively. - withInfo( + withFallbackReason( scanExec, "Full native scan disabled because default values for nested types are not supported") return scanExec @@ -185,7 +185,7 @@ case class CometScanRule(session: SparkSession) nativeScan(plan, session, scanExec, r, hadoopConf).getOrElse(scanExec) case _ => - withInfo(scanExec, s"Unsupported relation ${scanExec.relation}") + withFallbackReason(scanExec, s"Unsupported relation ${scanExec.relation}") } } @@ -196,7 +196,9 @@ case class CometScanRule(session: SparkSession) r: HadoopFsRelation, hadoopConf: Configuration): Option[SparkPlan] = { if (!COMET_EXEC_ENABLED.get()) { - withInfo(scanExec, s"Native Parquet scan requires ${COMET_EXEC_ENABLED.key} to be enabled") + withFallbackReason( + scanExec, + s"Native Parquet scan requires ${COMET_EXEC_ENABLED.key} to be enabled") return None } // Disabling the vectorized reader opts into parquet-mr's permissive behavior @@ -205,7 +207,7 @@ case class CometScanRule(session: SparkSession) // replace the scan via COMET_SCAN_ALLOW_DISABLED_PARQUET_VECTORIZED_READER. if (!conf.parquetVectorizedReaderEnabled && !COMET_SCAN_ALLOW_DISABLED_PARQUET_VECTORIZED_READER.get()) { - withInfo( + withFallbackReason( scanExec, "Native Parquet scan is incompatible with " + s"${SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key}=false; set " + @@ -216,11 +218,11 @@ case class CometScanRule(session: SparkSession) return None } if (encryptionEnabled(hadoopConf) && !isEncryptionConfigSupported(hadoopConf)) { - withInfo(scanExec, "Native Parquet scan does not support encryption") + withFallbackReason(scanExec, "Native Parquet scan does not support encryption") return None } if (scanExec.fileConstantMetadataColumns.nonEmpty) { - withInfo(scanExec, "Native DataFusion scan does not support metadata columns") + withFallbackReason(scanExec, "Native DataFusion scan does not support metadata columns") return None } // input_file_name, input_file_block_start, and input_file_block_length read from @@ -231,14 +233,14 @@ case class CometScanRule(session: SparkSession) case _: InputFileName | _: InputFileBlockStart | _: InputFileBlockLength => true case _ => false }))) { - withInfo( + withFallbackReason( scanExec, "Native DataFusion scan is not compatible with input_file_name, " + "input_file_block_start, or input_file_block_length") return None } if (ShimFileFormat.findRowIndexColumnIndexInSchema(scanExec.requiredSchema) >= 0) { - withInfo(scanExec, "Native DataFusion scan does not support row index generation") + withFallbackReason(scanExec, "Native DataFusion scan does not support row index generation") return None } if (!isSchemaSupported(scanExec, r)) { @@ -288,7 +290,7 @@ case class CometScanRule(session: SparkSession) scanExec.clone().asInstanceOf[BatchScanExec], runtimeFilters = scanExec.runtimeFilters) } else { - withInfos(scanExec, fallbackReasons.toSet) + withFallbackReasons(scanExec, fallbackReasons.toSet) } // Iceberg scan - detected by class name. SparkStagedScan covers reads issued by @@ -301,13 +303,13 @@ case class CometScanRule(session: SparkSession) if (!COMET_ICEBERG_NATIVE_ENABLED.get()) { fallbackReasons += "Native Iceberg scan disabled because " + s"${COMET_ICEBERG_NATIVE_ENABLED.key} is not enabled" - return withInfos(scanExec, fallbackReasons.toSet) + return withFallbackReasons(scanExec, fallbackReasons.toSet) } if (!COMET_EXEC_ENABLED.get()) { fallbackReasons += "Native Iceberg scan disabled because " + s"${COMET_EXEC_ENABLED.key} is not enabled" - return withInfos(scanExec, fallbackReasons.toSet) + return withFallbackReasons(scanExec, fallbackReasons.toSet) } val typeChecker = CometScanTypeChecker() @@ -396,7 +398,7 @@ case class CometScanRule(session: SparkSession) case Some(m) => m case None => fallbackReasons += "Failed to extract Iceberg metadata via reflection" - return withInfos(scanExec, fallbackReasons.toSet) + return withFallbackReasons(scanExec, fallbackReasons.toSet) } // Now perform all validation using the pre-extracted metadata @@ -439,7 +441,7 @@ case class CometScanRule(session: SparkSession) case e: Exception => fallbackReasons += "Iceberg reflection failure: Could not validate " + s"FileScanTasks: ${e.getMessage}" - return withInfos(scanExec, fallbackReasons.toSet) + return withFallbackReasons(scanExec, fallbackReasons.toSet) } // Check if all files are Parquet format and use supported filesystem schemes @@ -639,11 +641,11 @@ case class CometScanRule(session: SparkSession) runtimeFilters = scanExec.runtimeFilters, nativeIcebergScanMetadata = Some(metadata)) } else { - withInfos(scanExec, fallbackReasons.toSet) + withFallbackReasons(scanExec, fallbackReasons.toSet) } case other => - withInfo( + withFallbackReason( scanExec, s"Unsupported scan: ${other.getClass.getName}. " + "Comet Scan only supports Parquet and Iceberg Parquet file formats") @@ -674,7 +676,7 @@ case class CometScanRule(session: SparkSession) val schemaSupported = typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons) if (!schemaSupported) { - withInfo( + withFallbackReason( scanExec, s"Unsupported schema ${scanExec.requiredSchema}: ${fallbackReasons.mkString(", ")}") return false @@ -682,7 +684,7 @@ case class CometScanRule(session: SparkSession) val partitionSchemaSupported = typeChecker.isSchemaSupported(r.partitionSchema, fallbackReasons) if (!partitionSchemaSupported) { - withInfo( + withFallbackReason( scanExec, s"Unsupported partitioning schema ${scanExec.requiredSchema}: " + fallbackReasons.mkString(", ")) diff --git a/spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala b/spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala index 6a408ee745..2864eea4ed 100644 --- a/spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala +++ b/spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.Join import org.apache.spark.sql.execution.{SortExec, SparkPlan} import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason /** * Adapted from equivalent rule in Apache Gluten. @@ -69,7 +69,7 @@ object RewriteJoin extends JoinSelectionHelper { getSmjBuildSide(smj) match { case Some(BuildRight) if smj.joinType == LeftSemi => // LeftSemi https://github.com/apache/datafusion-comet/issues/2667 - withInfo( + withFallbackReason( smj, "Cannot rewrite SortMergeJoin to HashJoin: " + s"BuildRight with ${smj.joinType} is not supported") diff --git a/spark/src/main/scala/org/apache/comet/serde/CometBloomFilterMightContain.scala b/spark/src/main/scala/org/apache/comet/serde/CometBloomFilterMightContain.scala index a13b685ea6..75e0f532e3 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometBloomFilterMightContain.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometBloomFilterMightContain.scala @@ -21,7 +21,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, BloomFilterMightContain} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal object CometBloomFilterMightContain extends CometExpressionSerde[BloomFilterMightContain] { @@ -45,7 +45,7 @@ object CometBloomFilterMightContain extends CometExpressionSerde[BloomFilterMigh .setBloomFilterMightContain(builder) .build()) } else { - withInfo(expr, bloomFilter, value) + withFallbackReason(expr, bloomFilter, value) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/CometScalaUDF.scala b/spark/src/main/scala/org/apache/comet/serde/CometScalaUDF.scala index 852e80ae44..a1d5be84ff 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometScalaUDF.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometScalaUDF.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, import org.apache.spark.sql.types.BinaryType import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.codegen.CometBatchKernelCodegen import org.apache.comet.serde.ExprOuterClass.Expr import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, serializeDataType} @@ -61,16 +61,17 @@ object CometScalaUDF extends CometExpressionSerde[ScalaUDF] { * Arrow-direct codegen dispatcher. The dispatcher will Janino-compile `expr.doGenCode` into a * batch kernel on first invocation per task. * - * Returns `None` (with `withInfo` tagging the reason) when the dispatcher is disabled via - * [[CometConf.COMET_SCALA_UDF_CODEGEN_ENABLED]] or when [[CometBatchKernelCodegen.canHandle]] - * refuses the expression tree. Callers should treat `None` as a clean Spark-fallback signal. + * Returns `None` (with `withFallbackReason` tagging the reason) when the dispatcher is disabled + * via [[CometConf.COMET_SCALA_UDF_CODEGEN_ENABLED]] or when + * [[CometBatchKernelCodegen.canHandle]] refuses the expression tree. Callers should treat + * `None` as a clean Spark-fallback signal. */ def emitJvmCodegenDispatch( expr: Expression, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = { if (!CometConf.COMET_SCALA_UDF_CODEGEN_ENABLED.get()) { - withInfo( + withFallbackReason( expr, s"${CometConf.COMET_SCALA_UDF_CODEGEN_ENABLED.key}=false; expression has no native " + "path so the plan falls back to Spark") @@ -82,10 +83,11 @@ object CometScalaUDF extends CometExpressionSerde[ScalaUDF] { val attrs = expr.collect { case a: AttributeReference => a }.distinct val boundExpr = BindReferences.bindReference(expr, AttributeSeq(attrs)) - // Gate at plan time. Surface the reason via withInfo rather than crashing Janino at execute. + // Gate at plan time. Surface the reason via withFallbackReason rather than crashing Janino + // at execute. CometBatchKernelCodegen.canHandle(boundExpr) match { case Some(reason) => - withInfo(expr, reason) + withFallbackReason(expr, reason) return None case None => } @@ -133,7 +135,7 @@ class CometCodegenDispatch[T <: Expression] extends CometExpressionSerde[T] { // Intentionally no getCompatibleNotes override: the docs generator emits compat notes under // a heading that promises "no additional configuration required". The dispatcher flag is a // global concern documented elsewhere; tagging each expression here would contradict the - // heading. When the flag is off, `convert` returns None with a clear withInfo reason that + // heading. When the flag is off, `convert` returns None with a clear fallback reason that // shows up in EXPLAIN, which is the right place for that signal. override def convert(expr: T, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = CometScalaUDF.emitJvmCodegenDispatch(expr, inputs, binding) diff --git a/spark/src/main/scala/org/apache/comet/serde/CometScalarSubquery.scala b/spark/src/main/scala/org/apache/comet/serde/CometScalarSubquery.scala index b1f5a28271..329c91f49e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometScalarSubquery.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometScalarSubquery.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.execution.ScalarSubquery -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{serializeDataType, supportedDataType} object CometScalarSubquery extends CometExpressionSerde[ScalarSubquery] { @@ -33,7 +33,9 @@ object CometScalarSubquery extends CometExpressionSerde[ScalarSubquery] { if (supportedDataType(expr.dataType)) { val dataType = serializeDataType(expr.dataType) if (dataType.isEmpty) { - withInfo(expr, s"Failed to serialize datatype ${expr.dataType} for scalar subquery") + withFallbackReason( + expr, + s"Failed to serialize datatype ${expr.dataType} for scalar subquery") return None } @@ -43,7 +45,7 @@ object CometScalarSubquery extends CometExpressionSerde[ScalarSubquery] { .setDatatype(dataType.get) Some(ExprOuterClass.Expr.newBuilder().setSubquery(builder).build()) } else { - withInfo(expr, s"Unsupported data type: ${expr.dataType}") + withFallbackReason(expr, s"Unsupported data type: ${expr.dataType}") None } diff --git a/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala b/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala index 3647645109..3dcd67a65d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Descending, NullsFirst, NullsLast, SortOrder} import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal object CometSortOrder extends CometExpressionSerde[SortOrder] { @@ -73,7 +73,7 @@ object CometSortOrder extends CometExpressionSerde[SortOrder] { .setSortOrder(sortOrderBuilder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index b818b61b1b..f407e5c98a 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions._ import org.apache.comet.parquet.CometParquetUtils import org.apache.comet.serde.ExprOuterClass.{AggExpr, Expr, ScalarFunc} @@ -558,7 +558,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { if (aggExpr.isDistinct && aggExpr.aggregateFunction.children.length > 1 && aggExpr.aggregateFunction.prettyName != "count") { - withInfo(aggExpr, s"Multi-column distinct aggregate not supported for: $aggExpr") + withFallbackReason(aggExpr, s"Multi-column distinct aggregate not supported for: $aggExpr") return None } @@ -569,7 +569,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { val aggHandler = handler.asInstanceOf[CometAggregateExpressionSerde[AggregateFunction]] val exprConfName = aggHandler.getExprConfigName(fn) if (!CometConf.isExprEnabled(exprConfName)) { - withInfo( + withFallbackReason( aggExpr, "Expression support is disabled. Set " + s"${CometConf.getExprEnabledConfigKey(exprConfName)}=true to enable it.") @@ -577,7 +577,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { } aggHandler.getSupportLevel(fn) match { case Unsupported(notes) => - withInfo(fn, notes.getOrElse("")) + withFallbackReason(fn, notes.getOrElse("")) None case Incompatible(notes) => val exprAllowIncompat = CometConf.isExprAllowIncompat(exprConfName) @@ -591,7 +591,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { aggHandler.convert(aggExpr, fn, inputs, binding, conf) } else { val optionalNotes = notes.map(str => s" ($str)").getOrElse("") - withInfo( + withFallbackReason( fn, s"$fn is not fully compatible with Spark$optionalNotes. To enable it anyway, " + s"set ${CometConf.getExprAllowIncompatConfigKey(exprConfName)}=true. " + @@ -605,7 +605,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { aggHandler.convert(aggExpr, fn, inputs, binding, conf) } case _ => - withInfo( + withFallbackReason( aggExpr, s"unsupported Spark aggregate function: ${fn.prettyName}", fn.children: _*) @@ -622,7 +622,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { if (aggExpr.filter.isDefined && aggExpr.mode == Partial) { val filterProto = exprToProto(aggExpr.filter.get, inputs, binding) if (filterProto.isEmpty) { - withInfo(aggExpr, aggExpr.filter.get) + withFallbackReason(aggExpr, aggExpr.filter.get) return None } builder.setFilter(filterProto.get) @@ -694,7 +694,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { def convert[T <: Expression](expr: T, handler: CometExpressionSerde[T]): Option[Expr] = { val exprConfName = handler.getExprConfigName(expr) if (!CometConf.isExprEnabled(exprConfName)) { - withInfo( + withFallbackReason( expr, "Expression support is disabled. Set " + s"${CometConf.getExprEnabledConfigKey(exprConfName)}=true to enable it.") @@ -702,7 +702,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { } handler.getSupportLevel(expr) match { case Unsupported(notes) => - withInfo(expr, notes.getOrElse("")) + withFallbackReason(expr, notes.getOrElse("")) None case Incompatible(notes) => val exprAllowIncompat = CometConf.isExprAllowIncompat(exprConfName) @@ -716,7 +716,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { handler.convert(expr, inputs, binding) } else { val optionalNotes = notes.map(str => s" ($str)").getOrElse("") - withInfo( + withFallbackReason( expr, s"$expr is not fully compatible with Spark$optionalNotes. To enable it anyway, " + s"set ${CometConf.getExprAllowIncompatConfigKey(exprConfName)}=true. " + @@ -744,7 +744,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { case Some(handler) => convert(expr, handler.asInstanceOf[CometExpressionSerde[Expression]]) case _ => - withInfo(expr, s"${expr.prettyName} is not supported", expr.children: _*) + withFallbackReason(expr, s"${expr.prettyName} is not supported", expr.children: _*) None } }) @@ -795,7 +795,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { .newBuilder(), inner).build()) } else { - withInfo(expr, child) + withFallbackReason(expr, child) None } } @@ -825,7 +825,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { .newBuilder(), inner).build()) } else { - withInfo(expr, left, right) + withFallbackReason(expr, left, right) None } } @@ -869,7 +869,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { childExpr: Expression*): Option[Expr] = { optExpr match { case None => - withInfo(expr, childExpr: _*) + withFallbackReason(expr, childExpr: _*) None case o => o } @@ -906,7 +906,9 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { case _ => supportedScalarSortElementType(sortOrder.head.dataType) } if (!canSort) { - withInfo(op, s"Sort on single column of type ${sortOrder.head.dataType} is not supported") + withFallbackReason( + op, + s"Sort on single column of type ${sortOrder.head.dataType} is not supported") false } else { true diff --git a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala index 2714a7e466..60ba5c41eb 100644 --- a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala +++ b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.types.{ByteType, DataTypes, DecimalType, IntegerType import org.apache.comet.CometConf import org.apache.comet.CometConf.COMET_EXEC_STRICT_FLOATING_POINT -import org.apache.comet.CometSparkSessionExtensions.{isSpark41Plus, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{isSpark41Plus, withFallbackReason} import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProto, serializeDataType} import org.apache.comet.shims.CometEvalModeUtil @@ -43,14 +43,14 @@ object CometMin extends CometAggregateExpressionSerde[Min] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.minMaxDataTypeSupported(expr.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${expr.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${expr.dataType}") return None } if (expr.dataType == DataTypes.FloatType || expr.dataType == DataTypes.DoubleType) { if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get()) { // https://github.com/apache/datafusion-comet/issues/2448 - withInfo( + withFallbackReason( aggExpr, s"floating-point not supported when ${COMET_EXEC_STRICT_FLOATING_POINT.key}=true") return None @@ -72,10 +72,10 @@ object CometMin extends CometAggregateExpressionSerde[Min] { .setMin(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${expr.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${expr.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -92,14 +92,14 @@ object CometMax extends CometAggregateExpressionSerde[Max] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.minMaxDataTypeSupported(expr.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${expr.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${expr.dataType}") return None } if (expr.dataType == DataTypes.FloatType || expr.dataType == DataTypes.DoubleType) { if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get()) { // https://github.com/apache/datafusion-comet/issues/2448 - withInfo( + withFallbackReason( aggExpr, s"floating-point not supported when ${COMET_EXEC_STRICT_FLOATING_POINT.key}=true") return None @@ -121,10 +121,10 @@ object CometMax extends CometAggregateExpressionSerde[Max] { .setMax(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${expr.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${expr.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -147,7 +147,7 @@ object CometCount extends CometAggregateExpressionSerde[Count] { .setCount(builder) .build()) } else { - withInfo(aggExpr, expr.children: _*) + withFallbackReason(aggExpr, expr.children: _*) None } } @@ -166,7 +166,7 @@ object CometAverage extends CometAggregateExpressionSerde[Average] { conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.avgDataTypeSupported(avg.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${avg.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${avg.dataType}") return None } @@ -198,10 +198,10 @@ object CometAverage extends CometAggregateExpressionSerde[Average] { .setAvg(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${avg.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${avg.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -219,7 +219,7 @@ object CometSum extends CometAggregateExpressionSerde[Sum] { conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.sumDataTypeSupported(sum.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${sum.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${sum.dataType}") return None } @@ -241,9 +241,9 @@ object CometSum extends CometAggregateExpressionSerde[Sum] { .build()) } else { if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${sum.dataType} is not supported", sum.child) + withFallbackReason(aggExpr, s"datatype ${sum.dataType} is not supported", sum.child) } else { - withInfo(aggExpr, sum.child) + withFallbackReason(aggExpr, sum.child) } None } @@ -277,10 +277,10 @@ object CometFirst extends CometAggregateExpressionSerde[First] { .setFirst(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${first.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${first.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -313,10 +313,10 @@ object CometLast extends CometAggregateExpressionSerde[Last] { .setLast(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${last.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${last.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -332,7 +332,7 @@ object CometBitAndAgg extends CometAggregateExpressionSerde[BitAndAgg] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.bitwiseAggTypeSupported(bitAnd.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${bitAnd.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${bitAnd.dataType}") return None } val child = bitAnd.child @@ -349,10 +349,10 @@ object CometBitAndAgg extends CometAggregateExpressionSerde[BitAndAgg] { .setBitAndAgg(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${bitAnd.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${bitAnd.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -368,7 +368,7 @@ object CometBitOrAgg extends CometAggregateExpressionSerde[BitOrAgg] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.bitwiseAggTypeSupported(bitOr.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${bitOr.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${bitOr.dataType}") return None } val child = bitOr.child @@ -385,10 +385,10 @@ object CometBitOrAgg extends CometAggregateExpressionSerde[BitOrAgg] { .setBitOrAgg(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${bitOr.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${bitOr.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -404,7 +404,7 @@ object CometBitXOrAgg extends CometAggregateExpressionSerde[BitXorAgg] { binding: Boolean, conf: SQLConf): Option[ExprOuterClass.AggExpr] = { if (!AggSerde.bitwiseAggTypeSupported(bitXor.dataType)) { - withInfo(aggExpr, s"Unsupported data type: ${bitXor.dataType}") + withFallbackReason(aggExpr, s"Unsupported data type: ${bitXor.dataType}") return None } val child = bitXor.child @@ -421,10 +421,10 @@ object CometBitXOrAgg extends CometAggregateExpressionSerde[BitXorAgg] { .setBitXorAgg(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${bitXor.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${bitXor.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -457,7 +457,7 @@ trait CometCovBase { .setCovariance(builder) .build()) } else { - withInfo(aggExpr, "Child expression or data type not supported") + withFallbackReason(aggExpr, "Child expression or data type not supported") None } } @@ -523,7 +523,7 @@ trait CometVariance { .setVariance(builder) .build()) } else { - withInfo(aggExpr, expr.child) + withFallbackReason(aggExpr, expr.child) None } } @@ -578,7 +578,7 @@ trait CometStddev { .setStddev(builder) .build()) } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } @@ -630,7 +630,7 @@ object CometCorr extends CometAggregateExpressionSerde[Corr] { .setCorrelation(builder) .build()) } else { - withInfo(aggExpr, corr.x, corr.y) + withFallbackReason(aggExpr, corr.x, corr.y) None } } @@ -698,7 +698,7 @@ object CometBloomFilterAggregate extends CometAggregateExpressionSerde[BloomFilt .setBloomFilterAgg(builder) .build()) } else { - withInfo( + withFallbackReason( aggExpr, bloomFilter.child, bloomFilter.estimatedNumItemsExpression, @@ -751,10 +751,10 @@ object CometCollectSet extends CometAggregateExpressionSerde[CollectSet] { .setCollectSet(builder) .build()) } else if (dataType.isEmpty) { - withInfo(aggExpr, s"datatype ${expr.dataType} is not supported", child) + withFallbackReason(aggExpr, s"datatype ${expr.dataType} is not supported", child) None } else { - withInfo(aggExpr, child) + withFallbackReason(aggExpr, child) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala b/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala index 85574fbab7..2101a4e4aa 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala @@ -24,7 +24,7 @@ import scala.math.min import org.apache.spark.sql.catalyst.expressions.{Add, Attribute, Cast, Divide, EmptyRow, EqualTo, EvalMode, Expression, If, IntegralDivide, Literal, Multiply, Remainder, Round, Subtract, UnaryMinus} import org.apache.spark.sql.types.{ByteType, DataType, DecimalType, DoubleType, FloatType, IntegerType, LongType, ShortType} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProtoWithReturnType, serializeDataType} import org.apache.comet.shims.CometEvalModeUtil @@ -61,7 +61,7 @@ trait MathBase { .newBuilder(), inner).build()) } else { - withInfo(expr, left, right) + withFallbackReason(expr, left, right) None } } @@ -92,7 +92,7 @@ object CometAdd extends CometExpressionSerde[Add] with MathBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } createMathExpression( @@ -114,7 +114,7 @@ object CometSubtract extends CometExpressionSerde[Subtract] with MathBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } createMathExpression( @@ -136,7 +136,7 @@ object CometMultiply extends CometExpressionSerde[Multiply] with MathBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } createMathExpression( @@ -163,7 +163,7 @@ object CometDivide extends CometExpressionSerde[Divide] with MathBase { val rightExpr = if (expr.evalMode != EvalMode.ANSI) nullIfWhenPrimitive(expr.right) else expr.right if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } val divideExpr = createMathExpression( @@ -200,7 +200,7 @@ object CometIntegralDivide extends CometExpressionSerde[IntegralDivide] with Mat inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } @@ -264,11 +264,11 @@ object CometRemainder extends CometExpressionSerde[Remainder] with MathBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!supportedDataType(expr.left.dataType)) { - withInfo(expr, s"Unsupported datatype ${expr.left.dataType}") + withFallbackReason(expr, s"Unsupported datatype ${expr.left.dataType}") return None } if (expr.evalMode == EvalMode.TRY) { - withInfo(expr, s"Eval mode ${expr.evalMode} is not supported") + withFallbackReason(expr, s"Eval mode ${expr.evalMode} is not supported") return None } @@ -297,7 +297,7 @@ object CometRound extends CometExpressionSerde[Round] { lazy val childExpr = exprToProtoInternal(r.child, inputs, binding) r.child.dataType match { case t: DecimalType if t.scale < 0 => // Spark disallows negative scale SPARK-30252 - withInfo(r, "Decimal type has negative scale") + withFallbackReason(r, "Decimal type has negative scale") None case _ if scaleV == null => exprToProtoInternal(Literal(null), inputs, binding) @@ -318,7 +318,7 @@ object CometRound extends CometExpressionSerde[Round] { // I.e. 6.13171162472835E18 == 6.1317116247283497E18. However, toString() does not. // That results in round(6.1317116247283497E18, -5) == 6.1317116247282995E18 instead // of 6.1317116247283999E18. - withInfo(r, "Comet does not support Spark's BigDecimal rounding") + withFallbackReason(r, "Comet does not support Spark's BigDecimal rounding") None case _ => // `scale` must be Int64 type in DataFusion @@ -352,7 +352,7 @@ object CometUnaryMinus extends CometExpressionSerde[UnaryMinus] { .setUnaryMinus(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index 5edc08840a..e9e9aba9ec 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde._ import org.apache.comet.shims.{CometExprShim, CometTypeShim} @@ -44,7 +44,7 @@ object CometArrayRemove val inputTypes: Set[DataType] = expr.children.map(_.dataType).toSet for (dt <- inputTypes) { if (!isTypeSupported(dt)) { - withInfo(expr, s"data type not supported: $dt") + withFallbackReason(expr, s"data type not supported: $dt") return None } } @@ -100,7 +100,7 @@ object CometArrayAppend extends CometExpressionSerde[ArrayAppend] { .setCaseWhen(caseWhenExpr) .build()) } else { - withInfo(expr, expr.children: _*) + withFallbackReason(expr, expr.children: _*) None } } @@ -177,7 +177,7 @@ object CometSortArray extends CometExpressionSerde[SortArray] { exprToProtoInternal(Literal(direction), inputs, binding), exprToProtoInternal(Literal(nullOrdering), inputs, binding)) case other => - withInfo(expr, s"ascendingOrder must be a boolean literal: $other") + withFallbackReason(expr, s"ascendingOrder must be a boolean literal: $other") (None, None) } @@ -361,7 +361,7 @@ object CometArrayExcept extends CometExpressionSerde[ArrayExcept] with CometExpr val inputTypes = expr.children.map(_.dataType).toSet for (dt <- inputTypes) { if (!isTypeSupported(dt)) { - withInfo(expr, s"data type not supported: $dt") + withFallbackReason(expr, s"data type not supported: $dt") return None } } @@ -440,7 +440,7 @@ object CometArrayInsert extends CometExpressionSerde[ArrayInsert] { .setArrayInsert(arrayInsertBuilder) .build()) } else { - withInfo( + withFallbackReason( expr, "unsupported arguments for ArrayInsert", expr.children.head, @@ -485,7 +485,7 @@ object CometCreateArray extends CometExpressionSerde[CreateArray] { if (childExprs.forall(_.isDefined)) { scalarFunctionExprToProto("make_array", childExprs: _*) } else { - withInfo(expr, "unsupported arguments for CreateArray", children: _*) + withFallbackReason(expr, "unsupported arguments for CreateArray", children: _*) None } } @@ -514,7 +514,7 @@ object CometGetArrayItem extends CometExpressionSerde[GetArrayItem] { .setListExtract(listExtractBuilder) .build()) } else { - withInfo(expr, "unsupported arguments for GetArrayItem", expr.child, expr.ordinal) + withFallbackReason(expr, "unsupported arguments for GetArrayItem", expr.child, expr.ordinal) None } } @@ -547,7 +547,7 @@ object CometArrayReverse extends CometExpressionSerde[Reverse] with ArraysBase { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!isTypeSupported(expr.child.dataType)) { - withInfo(expr, s"child data type not supported: ${expr.child.dataType}") + withFallbackReason(expr, s"child data type not supported: ${expr.child.dataType}") return None } val reverseExprProto = exprToProto(expr.child, inputs, binding) @@ -571,7 +571,7 @@ object CometElementAt extends CometExpressionSerde[ElementAt] { val defaultExpr = expr.defaultValueOutOfBound.flatMap(exprToProtoInternal(_, inputs, binding)) if (!expr.left.dataType.isInstanceOf[ArrayType]) { - withInfo(expr, "Input is not an array") + withFallbackReason(expr, "Input is not an array") return None } @@ -592,7 +592,7 @@ object CometElementAt extends CometExpressionSerde[ElementAt] { .setListExtract(arrayExtractBuilder) .build()) } else { - withInfo(expr, "unsupported arguments for ElementAt", expr.left, expr.right) + withFallbackReason(expr, "unsupported arguments for ElementAt", expr.left, expr.right) None } } @@ -607,7 +607,7 @@ object CometFlatten extends CometExpressionSerde[Flatten] with ArraysBase { val inputTypes = expr.children.map(_.dataType).toSet for (dt <- inputTypes) { if (!isTypeSupported(dt)) { - withInfo(expr, s"data type not supported: $dt") + withFallbackReason(expr, s"data type not supported: $dt") return None } } @@ -703,14 +703,14 @@ object CometArrayPosition extends CometExpressionSerde[ArrayPosition] with Array inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (expr.children.forall(_.foldable)) { - withInfo(expr, "all arguments are literals, falling back to Spark") + withFallbackReason(expr, "all arguments are literals, falling back to Spark") return None } // Check if input types are supported val inputTypes: Set[DataType] = expr.children.map(_.dataType).toSet for (dt <- inputTypes) { if (!isTypeSupported(dt)) { - withInfo(expr, s"data type not supported: $dt") + withFallbackReason(expr, s"data type not supported: $dt") return None } } @@ -789,7 +789,10 @@ object CometArraysZip extends CometExpressionSerde[ArraysZip] { .build()) } else { - withInfo(expr, "unsupported arguments for ArraysZip", expr.children ++ expr.names: _*) + withFallbackReason( + expr, + "unsupported arguments for ArraysZip", + expr.children ++ expr.names: _*) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/conditional.scala b/spark/src/main/scala/org/apache/comet/serde/conditional.scala index 617043524b..cd07730890 100644 --- a/spark/src/main/scala/org/apache/comet/serde/conditional.scala +++ b/spark/src/main/scala/org/apache/comet/serde/conditional.scala @@ -23,7 +23,7 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.sql.catalyst.expressions.{Attribute, CaseWhen, Coalesce, Expression, If, IsNotNull} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal object CometIf extends CometExpressionSerde[If] { @@ -45,7 +45,7 @@ object CometIf extends CometExpressionSerde[If] { .setIf(builder) .build()) } else { - withInfo(expr, expr.predicate, expr.trueValue, expr.falseValue) + withFallbackReason(expr, expr.predicate, expr.trueValue, expr.falseValue) None } } @@ -76,7 +76,7 @@ object CometCaseWhen extends CometExpressionSerde[CaseWhen] { if (elseValueExpr.isDefined) { builder.setElseExpr(elseValueExpr.get) } else { - withInfo(expr, expr.elseValue.get) + withFallbackReason(expr, expr.elseValue.get) return None } } @@ -86,7 +86,7 @@ object CometCaseWhen extends CometExpressionSerde[CaseWhen] { .setCaseWhen(builder) .build()) } else { - withInfo(expr, allBranches: _*) + withFallbackReason(expr, allBranches: _*) None } } @@ -116,7 +116,7 @@ object CometCoalesce extends CometExpressionSerde[Coalesce] { if (elseValueExpr.isDefined) { builder.setElseExpr(elseValueExpr.get) } else { - withInfo(expr, elseValue) + withFallbackReason(expr, elseValue) return None } Some( @@ -125,7 +125,7 @@ object CometCoalesce extends CometExpressionSerde[Coalesce] { .setCaseWhen(builder) .build()) } else { - withInfo(expr, branches.map(_._2): _*) + withFallbackReason(expr, branches.map(_._2): _*) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala b/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala index 80a2a39ef4..fbf9e4ecff 100644 --- a/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala +++ b/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, KnownFloatingPointNormalized} import org.apache.spark.sql.catalyst.optimizer.NormalizeNaNAndZero -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, serializeDataType} object CometKnownFloatingPointNormalized @@ -50,7 +50,7 @@ object CometKnownFloatingPointNormalized val dataType = serializeDataType(wrapped.dataType) if (dataType.isEmpty) { - withInfo(wrapped, s"Unsupported datatype ${wrapped.dataType}") + withFallbackReason(wrapped, s"Unsupported datatype ${wrapped.dataType}") return None } val ex = exprToProtoInternal(wrapped, inputs, binding) diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala b/spark/src/main/scala/org/apache/comet/serde/datetime.scala index e2995274ad..58bc1f3d01 100644 --- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.types.{DateType, DoubleType, FloatType, IntegerType, import org.apache.spark.unsafe.types.UTF8String import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.CometGetDateField.CometGetDateField import org.apache.comet.serde.ExprOuterClass.Expr @@ -216,7 +216,7 @@ object CometHour extends CometExpressionSerde[Hour] { .setHour(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } @@ -258,7 +258,7 @@ object CometMinute extends CometExpressionSerde[Minute] { .setMinute(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } @@ -300,7 +300,7 @@ object CometSecond extends CometExpressionSerde[Second] { .setSecond(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } @@ -336,7 +336,7 @@ object CometUnixTimestamp extends CometExpressionSerde[UnixTimestamp] { binding: Boolean): Option[ExprOuterClass.Expr] = { if (!isSupportedInputType(expr)) { val inputType = expr.children.head.dataType - withInfo(expr, s"unix_timestamp does not support input type: $inputType") + withFallbackReason(expr, s"unix_timestamp does not support input type: $inputType") return None } @@ -355,7 +355,7 @@ object CometUnixTimestamp extends CometExpressionSerde[UnixTimestamp] { .setUnixTimestamp(builder) .build()) } else { - withInfo(expr, expr.children.head) + withFallbackReason(expr, expr.children.head) None } } @@ -587,7 +587,7 @@ object CometTruncTimestamp extends CometExpressionSerde[TruncTimestamp] { .setTruncTimestamp(builder) .build()) } else { - withInfo(expr, expr.timestamp, expr.format) + withFallbackReason(expr, expr.timestamp, expr.format) None } } @@ -645,8 +645,8 @@ object CometDateFormat extends CometExpressionSerde[DateFormatClass] { "yyyy-MM-dd'T'HH:mm:ss" -> "%Y-%m-%dT%H:%M:%S") // Compatibility is decided inside `convert`: the native path covers a subset, and the codegen - // dispatcher covers everything else when enabled. Plan-time tagging happens via `withInfo` on - // the path that returns None. + // dispatcher covers everything else when enabled. Plan-time tagging happens via + // `withFallbackReason` on the path that returns None. override def getSupportLevel(expr: DateFormatClass): SupportLevel = Compatible() override def getCompatibleNotes(): Seq[String] = Seq( @@ -716,7 +716,7 @@ object CometHours extends CometExpressionSerde[Hours] { .build() } case other => - withInfo(expr, s"Hours does not support input type: $other") + withFallbackReason(expr, s"Hours does not support input type: $other") None } optExprWithInfo(optExpr, expr, expr.child) @@ -749,7 +749,7 @@ object CometDays extends CometExpressionSerde[Days] { CometCast.castToProto(expr, Some(timezone), DateType, child, CometEvalMode.LEGACY) } case other => - withInfo(expr, s"Days does not support input type: $other") + withFallbackReason(expr, s"Days does not support input type: $other") None } diff --git a/spark/src/main/scala/org/apache/comet/serde/hash.scala b/spark/src/main/scala/org/apache/comet/serde/hash.scala index a58e81b02d..baf6716ae4 100644 --- a/spark/src/main/scala/org/apache/comet/serde/hash.scala +++ b/spark/src/main/scala/org/apache/comet/serde/hash.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Murmur3Hash, Sha1, Sha2, XxHash64} import org.apache.spark.sql.types.{ArrayType, DataType, DecimalType, IntegerType, LongType, MapType, StringType, StructType} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, isTimeType, scalarFunctionExprToProtoWithReturnType, serializeDataType, supportedDataType} object CometXxHash64 extends CometExpressionSerde[XxHash64] { @@ -79,7 +79,7 @@ object CometSha2 extends CometExpressionSerde[Sha2] { // It's possible for spark to dynamically compute the number of bits from input // expression, however DataFusion does not support that yet. if (!expr.right.foldable) { - withInfo(expr, "For Sha2, non literal numBits is not supported") + withFallbackReason(expr, "For Sha2, non literal numBits is not supported") return None } @@ -95,7 +95,7 @@ object CometSha1 extends CometExpressionSerde[Sha1] { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (!HashUtils.isSupportedType(expr)) { - withInfo(expr, s"HashUtils doesn't support dataType: ${expr.child.dataType}") + withFallbackReason(expr, s"HashUtils doesn't support dataType: ${expr.child.dataType}") return None } val childExpr = exprToProtoInternal(expr.child, inputs, binding) @@ -118,7 +118,7 @@ private object HashUtils { case d: DecimalType if d.precision > 18 => // Spark converts decimals with precision > 18 into // Java BigDecimal before hashing - withInfo(expr, s"Unsupported datatype: $dt (precision > 18)") + withFallbackReason(expr, s"Unsupported datatype: $dt (precision > 18)") false case s: StructType => s.fields.forall(f => isSupportedDataType(expr, f.dataType)) @@ -127,10 +127,10 @@ private object HashUtils { case m: MapType => isSupportedDataType(expr, m.keyType) && isSupportedDataType(expr, m.valueType) case dt if isTimeType(dt) => - withInfo(expr, s"Unsupported datatype $dt") + withFallbackReason(expr, s"Unsupported datatype $dt") false case _ if !supportedDataType(dt, allowComplex = true) => - withInfo(expr, s"Unsupported datatype $dt") + withFallbackReason(expr, s"Unsupported datatype $dt") false case _ => true diff --git a/spark/src/main/scala/org/apache/comet/serde/literals.scala b/spark/src/main/scala/org/apache/comet/serde/literals.scala index 5b03985c09..4f2a5dfa5e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/literals.scala +++ b/spark/src/main/scala/org/apache/comet/serde/literals.scala @@ -29,7 +29,7 @@ import org.apache.spark.unsafe.types.UTF8String import com.google.protobuf.ByteString -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.DataTypeSupport.isComplexType import org.apache.comet.serde.{CometExpressionSerde, Compatible, ExprOuterClass, LiteralOuterClass, SupportLevel, Unsupported} import org.apache.comet.serde.QueryPlanSerde.{isTimeType, serializeDataType, supportedDataType} @@ -101,7 +101,7 @@ object CometLiteral extends CometExpressionSerde[Literal] with Logging { exprBuilder.setListVal(listLiteralBuilder.build()) exprBuilder.setDatatype(serializeDataType(dataType).get) case dt => - withInfo(expr, s"Unexpected datatype '$dt' for literal value '$value'") + withFallbackReason(expr, s"Unexpected datatype '$dt' for literal value '$value'") return None } } @@ -117,7 +117,7 @@ object CometLiteral extends CometExpressionSerde[Literal] with Logging { .setLiteral(exprBuilder) .build()) } else { - withInfo(expr, s"Unsupported datatype $dataType") + withFallbackReason(expr, s"Unsupported datatype $dataType") None } diff --git a/spark/src/main/scala/org/apache/comet/serde/math.scala b/spark/src/main/scala/org/apache/comet/serde/math.scala index 401d14cc78..659f1e6844 100644 --- a/spark/src/main/scala/org/apache/comet/serde/math.scala +++ b/spark/src/main/scala/org/apache/comet/serde/math.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Abs, Add, Atan2, Attribute, Ceil, CheckOverflow, Expression, Floor, Hex, If, LessThanOrEqual, Literal, Log, Log10, Log2, Logarithm, Unhex} import org.apache.spark.sql.types.{DecimalType, DoubleType, NumericType} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, serializeDataType} object CometAtan2 extends CometExpressionSerde[Atan2] { @@ -50,7 +50,7 @@ object CometCeil extends CometExpressionSerde[Ceil] { case t: DecimalType if t.scale == 0 => // zero scale is no-op childExpr case t: DecimalType if t.scale < 0 => // Spark disallows negative scale SPARK-30252 - withInfo(expr, s"Decimal type $t has negative scale") + withFallbackReason(expr, s"Decimal type $t has negative scale") None case _ => val optExpr = @@ -70,7 +70,7 @@ object CometFloor extends CometExpressionSerde[Floor] { case t: DecimalType if t.scale == 0 => // zero scale is no-op childExpr case t: DecimalType if t.scale < 0 => // Spark disallows negative scale SPARK-30252 - withInfo(expr, s"Decimal type $t has negative scale") + withFallbackReason(expr, s"Decimal type $t has negative scale") None case _ => val optExpr = @@ -237,7 +237,7 @@ object CometCheckOverflow extends CometExpressionSerde[CheckOverflow] { .setCheckOverflow(builder) .build()) } else { - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/namedExpressions.scala b/spark/src/main/scala/org/apache/comet/serde/namedExpressions.scala index aba52d3624..b778f2ea87 100644 --- a/spark/src/main/scala/org/apache/comet/serde/namedExpressions.scala +++ b/spark/src/main/scala/org/apache/comet/serde/namedExpressions.scala @@ -21,7 +21,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, BindReferences, BoundReference} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, serializeDataType} object CometAlias extends CometExpressionSerde[Alias] { @@ -31,7 +31,7 @@ object CometAlias extends CometExpressionSerde[Alias] { binding: Boolean): Option[ExprOuterClass.Expr] = { val r = exprToProtoInternal(a.child, inputs, binding) if (r.isEmpty) { - withInfo(a, a.child) + withFallbackReason(a, a.child) } r } @@ -53,7 +53,7 @@ object CometAttributeReference extends CometExpressionSerde[AttributeReference] .bindReference(attr, inputs, allowFailures = true) if (boundRef.isInstanceOf[AttributeReference]) { - withInfo(attr, s"cannot resolve $attr among ${inputs.mkString(", ")}") + withFallbackReason(attr, s"cannot resolve $attr among ${inputs.mkString(", ")}") return None } @@ -82,7 +82,7 @@ object CometAttributeReference extends CometExpressionSerde[AttributeReference] .build()) } } else { - withInfo(attr, s"unsupported datatype: ${attr.dataType}") + withFallbackReason(attr, s"unsupported datatype: ${attr.dataType}") None } diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometDataWritingCommand.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometDataWritingCommand.scala index 69b9bd5f85..60fb65277e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometDataWritingCommand.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometDataWritingCommand.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.internal.SQLConf import org.apache.comet.{CometConf, ConfigEntry} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.objectstore.NativeConfig import org.apache.comet.serde.{CometOperatorSerde, Incompatible, OperatorOuterClass, SupportLevel, Unsupported} import org.apache.comet.serde.OperatorOuterClass.Operator @@ -104,7 +104,7 @@ object CometDataWritingCommand extends CometOperatorSerde[DataWritingCommandExec } if (scanTypes.length != cmd.query.output.length) { - withInfo(op, "Cannot serialize data types for native write") + withFallbackReason(op, "Cannot serialize data types for native write") return None } @@ -124,7 +124,7 @@ object CometDataWritingCommand extends CometOperatorSerde[DataWritingCommandExec case "zstd" => OperatorOuterClass.CompressionCodec.Zstd case "none" => OperatorOuterClass.CompressionCodec.None case other => - withInfo(op, s"Unsupported compression codec: $other") + withFallbackReason(op, s"Unsupported compression codec: $other") return None } @@ -157,7 +157,7 @@ object CometDataWritingCommand extends CometOperatorSerde[DataWritingCommandExec Some(writerOperator) } catch { case e: Exception => - withInfo( + withFallbackReason( op, "Failed to convert DataWritingCommandExec to native execution: " + s"${e.getMessage}") diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala index 76fefa15c4..8662f5774e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.comet.{CometConf, ConfigEntry} import org.apache.comet.CometConf.COMET_EXEC_ENABLED -import org.apache.comet.CometSparkSessionExtensions.{hasExplainInfo, isSpark35Plus, isSpark41Plus, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{hasFallbackReason, isSpark35Plus, isSpark41Plus, withFallbackReason} import org.apache.comet.objectstore.NativeConfig import org.apache.comet.parquet.CometParquetUtils import org.apache.comet.serde.{CometOperatorSerde, Compatible, OperatorOuterClass, SupportLevel} @@ -48,13 +48,15 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { /** Determine whether the scan is supported and tag the Spark plan with any fallback reasons */ def isSupported(scanExec: FileSourceScanExec): Boolean = { - if (hasExplainInfo(scanExec)) { + if (hasFallbackReason(scanExec)) { // this node has already been tagged with fallback reasons return false } if (!COMET_EXEC_ENABLED.get()) { - withInfo(scanExec, s"Full native scan disabled because ${COMET_EXEC_ENABLED.key} disabled") + withFallbackReason( + scanExec, + s"Full native scan disabled because ${COMET_EXEC_ENABLED.key} disabled") } // AQE DPP (SubqueryAdaptiveBroadcastExec) is converted to CometSubqueryBroadcastExec @@ -67,14 +69,14 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { // rule can't run. CometScanRule.transformV1Scan rejects AQE DPP on 3.4, so this check // is a safety net: if the scan somehow reached here with AQE DPP on 3.4, reject it. if (!isSpark35Plus && scanExec.partitionFilters.exists(isAqeDynamicPruningFilter)) { - withInfo(scanExec, "Native DataFusion scan does not support AQE DPP on Spark 3.4") + withFallbackReason(scanExec, "Native DataFusion scan does not support AQE DPP on Spark 3.4") } if (SQLConf.get.ignoreCorruptFiles || scanExec.relation.options .get("ignorecorruptfiles") // Spark sets this to lowercase. .contains("true")) { - withInfo(scanExec, "Full native scan disabled because ignoreCorruptFiles enabled") + withFallbackReason(scanExec, "Full native scan disabled because ignoreCorruptFiles enabled") } if (SQLConf.get.ignoreMissingFiles || @@ -82,11 +84,11 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { .get("ignoremissingfiles") // Spark sets this to lowercase. .contains("true")) { - withInfo(scanExec, "Full native scan disabled because ignoreMissingFiles enabled") + withFallbackReason(scanExec, "Full native scan disabled because ignoreMissingFiles enabled") } // the scan is supported if no fallback reasons were added to the node - !hasExplainInfo(scanExec) + !hasFallbackReason(scanExec) } /** Detects AQE DPP (SubqueryAdaptiveBroadcastExec), as opposed to non-AQE DPP. */ @@ -234,7 +236,7 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { } else { // There are unsupported scan type - withInfo( + withFallbackReason( scan, s"unsupported Comet operator: ${scan.nodeName}, due to unsupported data types above") None diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometSink.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometSink.scala index 845803d133..ff11b5d23b 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometSink.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometSink.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec import org.apache.spark.sql.execution.exchange.ReusedExchangeExec import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.ConfigEntry import org.apache.comet.serde.{CometOperatorSerde, OperatorOuterClass} import org.apache.comet.serde.OperatorOuterClass.Operator @@ -53,7 +53,7 @@ abstract class CometSink[T <: SparkPlan] extends CometOperatorSerde[T] { op.output.forall(a => supportedDataType(a.dataType, allowComplex = true)) if (!supportedTypes) { - withInfo(op, "Unsupported data type") + withFallbackReason(op, "Unsupported data type") return None } @@ -80,7 +80,7 @@ abstract class CometSink[T <: SparkPlan] extends CometOperatorSerde[T] { Some(builder.setScan(scanBuilder).build()) } else { // There are unsupported scan type - withInfo( + withFallbackReason( op, s"unsupported Comet operator: ${op.nodeName}, due to unsupported data types above") None @@ -123,7 +123,7 @@ object CometExchangeSink extends CometSink[SparkPlan] { op.output.forall(a => supportedDataType(a.dataType, allowComplex = true)) if (!supportedTypes) { - withInfo(op, "Unsupported data type for shuffle direct read") + withFallbackReason(op, "Unsupported data type for shuffle direct read") return None } @@ -144,7 +144,7 @@ object CometExchangeSink extends CometSink[SparkPlan] { builder.clearChildren() Some(builder.setShuffleScan(scanBuilder).build()) } else { - withInfo(op, s"unsupported data types in ${op.nodeName} for shuffle direct read") + withFallbackReason(op, s"unsupported data types in ${op.nodeName} for shuffle direct read") None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/predicates.scala b/spark/src/main/scala/org/apache/comet/serde/predicates.scala index 2e253bc239..a8fced3abe 100644 --- a/spark/src/main/scala/org/apache/comet/serde/predicates.scala +++ b/spark/src/main/scala/org/apache/comet/serde/predicates.scala @@ -24,7 +24,7 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.sql.catalyst.expressions.{And, Attribute, EqualNullSafe, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, In, InSet, IsNaN, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not, Or} import org.apache.spark.sql.types.BooleanType -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.ExprOuterClass.Expr import org.apache.comet.serde.QueryPlanSerde._ @@ -271,7 +271,7 @@ object ComparisonUtils { .build()) } else { val allExprs = list ++ Seq(value) - withInfo(expr, allExprs: _*) + withFallbackReason(expr, allExprs: _*) None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/statics.scala b/spark/src/main/scala/org/apache/comet/serde/statics.scala index bff64e753a..26f96ca1e0 100644 --- a/spark/src/main/scala/org/apache/comet/serde/statics.scala +++ b/spark/src/main/scala/org/apache/comet/serde/statics.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionImplUtils import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} object CometStaticInvoke extends CometExpressionSerde[StaticInvoke] { @@ -48,7 +48,7 @@ object CometStaticInvoke extends CometExpressionSerde[StaticInvoke] { case Some(handler) => handler.convert(expr, inputs, binding) case None => - withInfo( + withFallbackReason( expr, s"Static invoke expression: ${expr.functionName} is not supported", expr.children: _*) diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index aec4b19111..392f09a889 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.types.{BinaryType, DataTypes, LongType, StringType} import org.apache.spark.unsafe.types.UTF8String import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode, RegExp} import org.apache.comet.serde.ExprOuterClass.Expr import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType} @@ -60,7 +60,7 @@ class CometCaseConversionBase[T <: Expression](function: String) override def convert(expr: T, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = { if (!CometConf.COMET_CASE_CONVERSION_ENABLED.get()) { - withInfo( + withFallbackReason( expr, "Comet is not compatible with Spark for case conversion in " + s"locale-specific cases. Set ${CometConf.COMET_CASE_CONVERSION_ENABLED.key}=true " + @@ -119,11 +119,11 @@ object CometSubstring extends CometExpressionSerde[Substring] { builder.setLen(len.asInstanceOf[Int]) Some(ExprOuterClass.Expr.newBuilder().setSubstring(builder).build()) case None => - withInfo(expr, expr.str) + withFallbackReason(expr, expr.str) None } case _ => - withInfo(expr, "Substring pos and len must be literals") + withFallbackReason(expr, "Substring pos and len must be literals") None } } @@ -162,11 +162,11 @@ object CometLeft extends CometExpressionSerde[Left] { builder.setLen(lenValue.asInstanceOf[Int]) Some(ExprOuterClass.Expr.newBuilder().setSubstring(builder).build()) case None => - withInfo(expr, expr.str) + withFallbackReason(expr, expr.str) None } case _ => - withInfo(expr, "LEFT len must be a literal") + withFallbackReason(expr, "LEFT len must be a literal") None } } @@ -204,12 +204,12 @@ object CometRight extends CometExpressionSerde[Right] { builder.setLen(lenInt) Some(ExprOuterClass.Expr.newBuilder().setSubstring(builder).build()) case None => - withInfo(expr, expr.str) + withFallbackReason(expr, expr.str) None } } case _ => - withInfo(expr, "RIGHT len must be a literal") + withFallbackReason(expr, "RIGHT len must be a literal") None } } @@ -249,7 +249,7 @@ object CometConcatWs extends CometExpressionSerde[ConcatWs] { case _ if expr.children.forall(_.foldable) => // Fall back to Spark for all-literal args so ConstantFolding can handle it - withInfo(expr, "all arguments are foldable") + withFallbackReason(expr, "all arguments are foldable") None case _ => @@ -271,7 +271,9 @@ object CometLike extends CometExpressionSerde[Like] { binding, (builder, binaryExpr) => builder.setLike(binaryExpr)) } else { - withInfo(expr, s"custom escape character ${expr.escapeChar} not supported in LIKE") + withFallbackReason( + expr, + s"custom escape character ${expr.escapeChar} not supported in LIKE") None } } @@ -287,7 +289,7 @@ object CometRLike extends CometExpressionSerde[RLike] { case Literal(pattern, DataTypes.StringType) => if (!RegExp.isSupportedPattern(pattern.toString) && !CometConf.isExprAllowIncompat("regexp")) { - withInfo( + withFallbackReason( expr, s"Regexp pattern $pattern is not compatible with Spark. " + s"Set ${CometConf.getExprAllowIncompatConfigKey("regexp")}=true " + @@ -303,7 +305,7 @@ object CometRLike extends CometExpressionSerde[RLike] { (builder, binaryExpr) => builder.setRlike(binaryExpr)) } case _ => - withInfo(expr, "Only scalar regexp patterns are supported") + withFallbackReason(expr, "Only scalar regexp patterns are supported") None } } @@ -376,7 +378,7 @@ object CometRegExpReplace extends CometExpressionSerde[RegExpReplace] { override def getSupportLevel(expr: RegExpReplace): SupportLevel = { if (!RegExp.isSupportedPattern(expr.regexp.toString) && !CometConf.isExprAllowIncompat("regexp")) { - withInfo( + withFallbackReason( expr, s"Regexp pattern ${expr.regexp} is not compatible with Spark. " + s"Set ${CometConf.getExprAllowIncompatConfigKey("regexp")}=true " + @@ -487,11 +489,11 @@ trait CommonStringExprs { if (binExpr.isDefined) { CometCast.castToProto(expr, None, DataTypes.StringType, binExpr.get, CometEvalMode.TRY) } else { - withInfo(expr, bin) + withFallbackReason(expr, bin) None } case _ => - withInfo(expr, "Comet only supports decoding with 'utf-8'.") + withFallbackReason(expr, "Comet only supports decoding with 'utf-8'.") None } } diff --git a/spark/src/main/scala/org/apache/comet/serde/structs.scala b/spark/src/main/scala/org/apache/comet/serde/structs.scala index 9ef00272ec..17708f7be1 100644 --- a/spark/src/main/scala/org/apache/comet/serde/structs.scala +++ b/spark/src/main/scala/org/apache/comet/serde/structs.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, CreateNamedStruct, import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.DataTypeSupport import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, serializeDataType} @@ -36,7 +36,7 @@ object CometCreateNamedStruct extends CometExpressionSerde[CreateNamedStruct] { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { if (expr.names.length != expr.names.distinct.length) { - withInfo(expr, "CreateNamedStruct with duplicate field names are not supported") + withFallbackReason(expr, "CreateNamedStruct with duplicate field names are not supported") return None } @@ -53,7 +53,7 @@ object CometCreateNamedStruct extends CometExpressionSerde[CreateNamedStruct] { .setCreateNamedStruct(structBuilder) .build()) } else { - withInfo(expr, "unsupported arguments for CreateNamedStruct", expr.valExprs: _*) + withFallbackReason(expr, "unsupported arguments for CreateNamedStruct", expr.valExprs: _*) None } @@ -98,7 +98,7 @@ object CometGetArrayStructFields extends CometExpressionSerde[GetArrayStructFiel .setGetArrayStructFields(arrayStructFieldsBuilder) .build()) } else { - withInfo(expr, "unsupported arguments for GetArrayStructFields", expr.child) + withFallbackReason(expr, "unsupported arguments for GetArrayStructFields", expr.child) None } } @@ -136,7 +136,7 @@ object CometStructsToJson extends CometExpressionSerde[StructsToJson] { .setToJson(toJson) .build()) case _ => - withInfo(expr, expr.child) + withFallbackReason(expr, expr.child) None } } @@ -179,7 +179,7 @@ object CometJsonToStructs extends CometExpressionSerde[JsonToStructs] { binding: Boolean): Option[ExprOuterClass.Expr] = { if (expr.schema == null) { - withInfo(expr, "from_json requires explicit schema") + withFallbackReason(expr, "from_json requires explicit schema") return None } @@ -196,7 +196,7 @@ object CometJsonToStructs extends CometExpressionSerde[JsonToStructs] { val schemaType = expr.schema if (!isSupportedType(schemaType)) { - withInfo(expr, "from_json: Unsupported schema type") + withFallbackReason(expr, "from_json: Unsupported schema type") return None } @@ -204,13 +204,15 @@ object CometJsonToStructs extends CometExpressionSerde[JsonToStructs] { if (options.nonEmpty) { val mode = options.getOrElse("mode", "PERMISSIVE") if (mode != "PERMISSIVE") { - withInfo(expr, s"from_json: Only PERMISSIVE mode supported, got: $mode") + withFallbackReason(expr, s"from_json: Only PERMISSIVE mode supported, got: $mode") return None } val knownOptions = Set("mode") val unknownOpts = options.keySet -- knownOptions if (unknownOpts.nonEmpty) { - withInfo(expr, s"from_json: Ignoring unsupported options: ${unknownOpts.mkString(", ")}") + withFallbackReason( + expr, + s"from_json: Ignoring unsupported options: ${unknownOpts.mkString(", ")}") } } diff --git a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala index e5eeb5b848..916334179b 100644 --- a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, FromUnixTime, Literal} import org.apache.spark.sql.catalyst.util.TimestampFormatter -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} // TODO: DataFusion supports only -8334601211038 <= sec <= 8210266876799 @@ -49,7 +49,7 @@ object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] { val timeZone = exprToProtoInternal(Literal(expr.timeZoneId.orNull), inputs, binding) if (expr.format != Literal(TimestampFormatter.defaultPattern)) { - withInfo(expr, "Datetime pattern format is unsupported") + withFallbackReason(expr, "Datetime pattern format is unsupported") None } else if (secExpr.isDefined && formatExpr.isDefined) { val timestampExpr = @@ -57,7 +57,7 @@ object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] { val optExpr = scalarFunctionExprToProto("to_char", Seq(timestampExpr, formatExpr): _*) optExprWithInfo(optExpr, expr, expr.sec, expr.format) } else { - withInfo(expr, expr.sec, expr.format) + withFallbackReason(expr, expr.sec, expr.format) None } } diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala index e642bafa4f..18c5c1e2b8 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.types.DecimalType import com.google.common.base.Objects import org.apache.comet.{CometConf, ConfigEntry} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.serde.{AggSerde, CometOperatorSerde, Incompatible, OperatorOuterClass, SupportLevel} import org.apache.comet.serde.OperatorOuterClass.Operator import org.apache.comet.serde.QueryPlanSerde.{aggExprToProto, exprToProto, scalarFunctionExprToProto} @@ -68,7 +68,7 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { }.toArray if (winExprs.length != op.windowExpression.length) { - withInfo(op, "Unsupported window expression(s)") + withFallbackReason(op, "Unsupported window expression(s)") return None } @@ -115,14 +115,14 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { if (AggSerde.minMaxDataTypeSupported(min.dataType)) { Some(agg) } else { - withInfo(windowExpr, s"datatype ${min.dataType} is not supported", expr) + withFallbackReason(windowExpr, s"datatype ${min.dataType} is not supported", expr) None } case max: Max => if (AggSerde.minMaxDataTypeSupported(max.dataType)) { Some(agg) } else { - withInfo(windowExpr, s"datatype ${max.dataType} is not supported", expr) + withFallbackReason(windowExpr, s"datatype ${max.dataType} is not supported", expr) None } case s: Sum => @@ -130,11 +130,11 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { .isInstanceOf[DecimalType]) { Some(agg) } else { - withInfo(windowExpr, s"datatype ${s.dataType} is not supported", expr) + withFallbackReason(windowExpr, s"datatype ${s.dataType} is not supported", expr) None } case _ => - withInfo( + withFallbackReason( windowExpr, s"aggregate ${agg.aggregateFunction}" + " is not supported for window function", @@ -311,7 +311,9 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { val partitionColumnNames = partitionSpec.collect { case a: AttributeReference => a.name case other => - withInfo(op, s"Unsupported partition expression: ${other.getClass.getSimpleName}") + withFallbackReason( + op, + s"Unsupported partition expression: ${other.getClass.getSimpleName}") return false } @@ -319,7 +321,7 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { s.child match { case a: AttributeReference => a.name case other => - withInfo(op, s"Unsupported sort expression: ${other.getClass.getSimpleName}") + withFallbackReason(op, s"Unsupported sort expression: ${other.getClass.getSimpleName}") return false } } @@ -327,7 +329,7 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] { if (partitionColumnNames.zip(orderColumnNames).exists { case (partCol, orderCol) => partCol != orderCol }) { - withInfo(op, "Partitioning and sorting specifications must be the same.") + withFallbackReason(op, "Partitioning and sorting specifications must be the same.") return false } diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala index 493c20f8b7..a3e75f12de 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala @@ -50,7 +50,7 @@ import com.google.common.base.Objects import org.apache.comet.{CometConf, CometExplainInfo} import org.apache.comet.CometConf.{COMET_EXEC_SHUFFLE_ENABLED, COMET_SHUFFLE_MODE} -import org.apache.comet.CometSparkSessionExtensions.{hasExplainInfo, isCometShuffleManagerEnabled, withInfos} +import org.apache.comet.CometSparkSessionExtensions.{hasFallbackReason, isCometShuffleManagerEnabled, withFallbackReasons} import org.apache.comet.serde.{Compatible, OperatorOuterClass, QueryPlanSerde, SupportLevel, Unsupported} import org.apache.comet.serde.operator.CometSink import org.apache.comet.shims.{CometTypeShim, ShimCometShuffleExchangeExec} @@ -265,7 +265,8 @@ object CometShuffleExchangeExec /** * Decide which Comet shuffle path (if any) can handle this shuffle. Returns `None` if neither * native nor columnar shuffle can be used; in that case the node is tagged with the combined - * fallback reasons via `withInfos` so subsequent passes short-circuit via `hasExplainInfo`. + * fallback reasons via `withFallbackReasons` so subsequent passes short-circuit via + * `hasFallbackReason`. * * This is the single coordination point: the two path-specific predicates * (`nativeShuffleFailureReasons` / `columnarShuffleFailureReasons`) are pure - they return @@ -276,11 +277,11 @@ object CometShuffleExchangeExec // shuffle falls back to Spark and tagged it. Preserve that decision - re-deriving it against // a possibly-reshaped subtree (e.g. AQE stage-wrapping) can flip the answer and produce // inconsistent plans across passes (see #3949). - if (hasExplainInfo(s)) return None + if (hasFallbackReason(s)) return None isCometShuffleEnabledReason(s) match { case Some(reason) => - withInfos(s, Set(reason)) + withFallbackReasons(s, Set(reason)) return None case None => } @@ -291,7 +292,7 @@ object CometShuffleExchangeExec // On 3.5+ with AQE DPP, the scan converts to CometNativeScanExec and // stageContainsDPPScan won't match (it checks FileSourceScanExec). if (stageContainsDPPScan(s)) { - withInfos(s, Set("Stage contains a scan with Dynamic Partition Pruning")) + withFallbackReasons(s, Set("Stage contains a scan with Dynamic Partition Pruning")) return None } @@ -305,7 +306,7 @@ object CometShuffleExchangeExec if (!isCometPlan(s.child) && !CometConf.COMET_EXEC_SHUFFLE_CONVERT_FROM_SPARK_PLAN_ENABLED.get(s.conf)) { - withInfos( + withFallbackReasons( s, Set( s"${CometConf.COMET_EXEC_SHUFFLE_CONVERT_FROM_SPARK_PLAN_ENABLED.key} is disabled " + @@ -319,7 +320,7 @@ object CometShuffleExchangeExec } val combined = (nativeReasons ++ columnarReasons).toSet - if (combined.nonEmpty) withInfos(s, combined) + if (combined.nonEmpty) withFallbackReasons(s, combined) None } @@ -444,7 +445,7 @@ object CometShuffleExchangeExec reasons += s"unsupported range partitioning sort order: $o" // Roll up fallback reasons recorded on the sort-order expression (e.g. strict // floating-point sort) so they surface in the shuffle's explain output. - o.getTagValue(CometExplainInfo.EXTENSION_INFO).foreach(reasons ++= _) + o.getTagValue(CometExplainInfo.FALLBACK_REASONS).foreach(reasons ++= _) } } for (dt <- orderings.map(_.dataType).distinct) { diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala index 7d5398ae62..8cbf7c9189 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala @@ -51,7 +51,7 @@ import com.google.common.base.Objects import com.google.protobuf.CodedOutputStream import org.apache.comet.{CometConf, CometExecIterator, CometRuntimeException, ConfigEntry} -import org.apache.comet.CometSparkSessionExtensions.{isCometShuffleEnabled, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{isCometShuffleEnabled, withFallbackReason} import org.apache.comet.parquet.CometParquetUtils import org.apache.comet.rules.CometExecRule import org.apache.comet.serde.{CometOperatorSerde, Compatible, Incompatible, OperatorOuterClass, SupportLevel, Unsupported} @@ -823,7 +823,7 @@ object CometProjectExec extends CometOperatorSerde[ProjectExec] { .addAllProjectList(exprs.map(_.get).asJava) Some(builder.setProjection(projectBuilder).build()) } else { - withInfo(op, op.projectList: _*) + withFallbackReason(op, op.projectList: _*) None } } @@ -883,7 +883,7 @@ object CometFilterExec extends CometOperatorSerde[FilterExec] { .setPredicate(cond.get) Some(builder.setFilter(filterBuilder).build()) } else { - withInfo(op, op.condition, op.child) + withFallbackReason(op, op.condition, op.child) None } } @@ -944,7 +944,7 @@ object CometSortExec extends CometOperatorSerde[SortExec] { builder: Operator.Builder, childOp: Operator*): Option[OperatorOuterClass.Operator] = { if (!supportedSortType(op, op.sortOrder)) { - withInfo(op, "Unsupported data type in sort expressions") + withFallbackReason(op, "Unsupported data type in sort expressions") return None } @@ -956,7 +956,7 @@ object CometSortExec extends CometOperatorSerde[SortExec] { .addAllSortOrders(sortOrders.map(_.get).asJava) Some(builder.setSort(sortBuilder).build()) } else { - withInfo(op, "sort order not supported", op.sortOrder: _*) + withFallbackReason(op, "sort order not supported", op.sortOrder: _*) None } } @@ -1029,7 +1029,7 @@ object CometLocalLimitExec extends CometOperatorSerde[LocalLimitExec] { .setOffset(0) Some(builder.setLimit(limitBuilder).build()) } else { - withInfo(op, "No child operator") + withFallbackReason(op, "No child operator") None } } @@ -1090,7 +1090,7 @@ object CometGlobalLimitExec extends CometOperatorSerde[GlobalLimitExec] { Some(builder.setLimit(limitBuilder).build()) } else { - withInfo(op, "No child operator") + withFallbackReason(op, "No child operator") None } } @@ -1159,7 +1159,7 @@ object CometExpandExec extends CometOperatorSerde[ExpandExec] { .setNumExprPerProject(op.projections.head.size) Some(builder.setExpand(expandBuilder).build()) } else { - withInfo(op, allProjExprs: _*) + withFallbackReason(op, allProjExprs: _*) None } } @@ -1244,7 +1244,7 @@ object CometExplodeExec extends CometOperatorSerde[GenerateExec] { val childExprProto = exprToProto(childExpr, op.child.output) if (childExprProto.isEmpty) { - withInfo(op, childExpr) + withFallbackReason(op, childExpr) return None } @@ -1256,7 +1256,7 @@ object CometExplodeExec extends CometOperatorSerde[GenerateExec] { } if (projectExprs.exists(_.isEmpty) || childOp.isEmpty) { - withInfo(op, op.output: _*) + withFallbackReason(op, op.output: _*) return None } @@ -1422,13 +1422,15 @@ trait CometBaseAggregate { val sparkFinalMode = modes.contains(Final) && findCometPartialAgg(aggregate.child).isEmpty if (multiMode) { - withInfo(aggregate, s"Unsupported mixed aggregation modes: ${modes.mkString(", ")}") + withFallbackReason( + aggregate, + s"Unsupported mixed aggregation modes: ${modes.mkString(", ")}") return None } if (sparkFinalMode && !QueryPlanSerde.allAggsSupportMixedExecution(aggregate.aggregateExpressions)) { - withInfo( + withFallbackReason( aggregate, "Spark Final aggregate without Comet Partial requires compatible " + "intermediate buffer formats") @@ -1439,7 +1441,7 @@ trait CometBaseAggregate { // (Comet partial + Spark final with incompatible intermediate buffers) val unsafeReason = aggregate.getTagValue(CometExecRule.COMET_UNSAFE_PARTIAL) if (unsafeReason.isDefined) { - withInfo(aggregate, unsafeReason.get) + withFallbackReason(aggregate, unsafeReason.get) return None } @@ -1450,12 +1452,12 @@ trait CometBaseAggregate { val child = aggregate.child if (groupingExpressions.isEmpty && aggregateExpressions.isEmpty) { - withInfo(aggregate, "No group by or aggregation") + withFallbackReason(aggregate, "No group by or aggregation") return None } if (groupingExpressions.exists(expr => QueryPlanSerde.containsMapType(expr.dataType))) { - withInfo(aggregate, "Grouping on map-containing types is not supported") + withFallbackReason(aggregate, "Grouping on map-containing types is not supported") return None } @@ -1463,7 +1465,7 @@ trait CometBaseAggregate { // Collation-aware grouping requires collation-aware hashing/equality; Comet only // compares raw bytes, which would put rows that compare equal under the collation // into different groups. - withInfo(aggregate, "Grouping on non-default collated strings is not supported") + withFallbackReason(aggregate, "Grouping on non-default collated strings is not supported") return None } @@ -1475,7 +1477,9 @@ trait CometBaseAggregate { } if (emptyExprs.nonEmpty) { - withInfo(aggregate, s"Unsupported group expressions: ${emptyExprs.mkString(", ")}") + withFallbackReason( + aggregate, + s"Unsupported group expressions: ${emptyExprs.mkString(", ")}") return None } @@ -1501,7 +1505,7 @@ trait CometBaseAggregate { val attributes = groupingExpressions.map(_.toAttribute) ++ aggregateAttributes val resultExprs = resultExpressions.map(exprToProto(_, attributes)) if (resultExprs.exists(_.isEmpty)) { - withInfo( + withFallbackReason( aggregate, s"Unsupported result expressions found in: $resultExpressions", resultExpressions: _*) @@ -1517,7 +1521,9 @@ trait CometBaseAggregate { // - Mixed {Partial, PartialMerge} (for distinct aggregate plans) val isMixedPartialMerge = modeSet == Set(Partial, PartialMerge) if (modes.size > 1 && !isMixedPartialMerge) { - withInfo(aggregate, s"Unsupported mixed aggregation modes: ${modes.mkString(", ")}") + withFallbackReason( + aggregate, + s"Unsupported mixed aggregation modes: ${modes.mkString(", ")}") return None } @@ -1532,7 +1538,7 @@ trait CometBaseAggregate { case Final => CometAggregateMode.Final case PartialMerge => CometAggregateMode.PartialMerge case _ => - withInfo(aggregate, s"Unsupported aggregation mode ${modes.head}") + withFallbackReason(aggregate, s"Unsupported aggregation mode ${modes.head}") return None } } @@ -1548,7 +1554,7 @@ trait CometBaseAggregate { a.aggregateFunction.isInstanceOf[Last]) } if (unsupportedAggs.nonEmpty) { - withInfo( + withFallbackReason( aggregate, "PartialMerge not supported for aggregates: " + unsupportedAggs.map(_.aggregateFunction.prettyName).mkString(", ")) @@ -1565,7 +1571,7 @@ trait CometBaseAggregate { } if (aggExprs.exists(_.isEmpty)) { - withInfo( + withFallbackReason( aggregate, "Unsupported aggregate expression(s)", aggregateExpressions ++ aggregateExpressions.map(_.aggregateFunction): _*) @@ -1581,7 +1587,7 @@ trait CometBaseAggregate { val attributes = groupingExpressions.map(_.toAttribute) ++ aggregateAttributes val resultExprs = resultExpressions.map(exprToProto(_, attributes)) if (resultExprs.exists(_.isEmpty)) { - withInfo( + withFallbackReason( aggregate, s"Unsupported result expressions found in: $resultExpressions", resultExpressions: _*) @@ -1599,7 +1605,7 @@ trait CometBaseAggregate { case PartialMerge => CometAggregateMode.PartialMerge case Final => CometAggregateMode.Final case other => - withInfo(aggregate, s"Unsupported aggregation mode $other") + withFallbackReason(aggregate, s"Unsupported aggregation mode $other") return None } } @@ -1611,7 +1617,7 @@ trait CometBaseAggregate { } else { val allChildren: Seq[Expression] = groupingExpressions ++ aggregateExpressions ++ aggregateAttributes - withInfo(aggregate, allChildren: _*) + withFallbackReason(aggregate, allChildren: _*) None } } @@ -1840,7 +1846,7 @@ trait CometHashJoin { join.isInstanceOf[ShuffledHashJoinExec]) && !(CometConf.COMET_EXEC_BROADCAST_HASH_JOIN_ENABLED.get(join.conf) && join.isInstanceOf[BroadcastHashJoinExec])) { - withInfo(join, s"Invalid hash join type ${join.nodeName}") + withFallbackReason(join, s"Invalid hash join type ${join.nodeName}") return None } @@ -1852,7 +1858,7 @@ trait CometHashJoin { val joinKeys = join.leftKeys ++ join.rightKeys if (joinKeys.exists(key => isStringCollationType(key.dataType))) { - withInfo(join, "unsupported non-default collated string join keys") + withFallbackReason(join, "unsupported non-default collated string join keys") return None } @@ -1863,7 +1869,7 @@ trait CometHashJoin { (join.leftKeys.length != 1 || join.rightKeys.length != 1 || join.joinType != LeftAnti || join.buildSide != BuildRight || join.condition.isDefined)) { - withInfo( + withFallbackReason( join, "null-aware anti-join requires single-column LeftAnti BuildRight with no condition") return None @@ -1872,7 +1878,7 @@ trait CometHashJoin { val condition = join.condition.map { cond => val condProto = exprToProto(cond, join.left.output ++ join.right.output) if (condProto.isEmpty) { - withInfo(join, cond) + withFallbackReason(join, cond) return None } condProto.get @@ -1889,7 +1895,7 @@ trait CometHashJoin { case LeftAnti => JoinType.LeftAnti case _ => // Spark doesn't support other join types - withInfo(join, s"Unsupported join type ${join.joinType}") + withFallbackReason(join, s"Unsupported join type ${join.joinType}") return None } } @@ -1912,7 +1918,7 @@ trait CometHashJoin { Some(builder.setHashJoin(joinBuilder).build()) } else { val allExprs: Seq[Expression] = joinKeys - withInfo(join, allExprs: _*) + withFallbackReason(join, allExprs: _*) None } } @@ -2200,7 +2206,7 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { if (join.condition.isDefined && !CometConf.COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED .get(join.conf)) { - withInfo( + withFallbackReason( join, s"${CometConf.COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED.key} is not enabled", join.condition.get) @@ -2210,7 +2216,7 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { val condition = join.condition.map { cond => val condProto = exprToProto(cond, join.left.output ++ join.right.output) if (condProto.isEmpty) { - withInfo(join, cond) + withFallbackReason(join, cond) return None } condProto.get @@ -2227,14 +2233,14 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { case LeftAnti => JoinType.LeftAnti case _ => // Spark doesn't support other join types - withInfo(join, s"Unsupported join type ${join.joinType}") + withFallbackReason(join, s"Unsupported join type ${join.joinType}") return None } } val joinKeys = join.leftKeys ++ join.rightKeys if (joinKeys.exists(key => isStringCollationType(key.dataType))) { - withInfo(join, "unsupported non-default collated string join keys") + withFallbackReason(join, "unsupported non-default collated string join keys") return None } @@ -2248,7 +2254,7 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { } if (errorMsgs.nonEmpty) { - withInfo(join, errorMsgs.mkString("\n")) + withFallbackReason(join, errorMsgs.mkString("\n")) return None } @@ -2272,7 +2278,7 @@ object CometSortMergeJoinExec extends CometOperatorSerde[SortMergeJoinExec] { Some(builder.setSortMergeJoin(joinBuilder).build()) } else { val allExprs: Seq[Expression] = joinKeys - withInfo(join, allExprs: _*) + withFallbackReason(join, allExprs: _*) None } } diff --git a/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala index d3e3270700..3687e66079 100644 --- a/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.Sum import org.apache.spark.sql.types.DataTypes -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} @@ -75,7 +75,7 @@ trait CometExprShim extends CommonStringExprs { .setToPrettyString(toPrettyString) .build()) case _ => - withInfo(expr, child) + withFallbackReason(expr, child) None } } else { diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala index 868b09de9d..2c3364719c 100644 --- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.internal.types.StringTypeWithCollation import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, DataTypes, MapType, StringType} import org.apache.comet.{CometConf, CometExplainInfo} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible, SupportLevel} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} @@ -122,7 +122,7 @@ trait CometExprShim extends CommonStringExprs { .setToPrettyString(toPrettyString) .build()) case _ => - withInfo(expr, child) + withFallbackReason(expr, child) None } } else { @@ -144,8 +144,8 @@ trait CometExprShim extends CommonStringExprs { val exprProto = exprToProtoInternal(toJson, inputs, binding) if (exprProto.isEmpty) { toJson - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } exprProto case (Literal(evaluator: ParseUrlEvaluator, _), "evaluate", args) => @@ -153,8 +153,8 @@ trait CometExprShim extends CommonStringExprs { val result = exprToProtoInternal(parseUrl, inputs, binding) if (result.isEmpty) { parseUrl - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } result case _ => None @@ -163,11 +163,11 @@ trait CometExprShim extends CommonStringExprs { case ms: MapSort => val keyType = ms.dataType.asInstanceOf[MapType].keyType if (!supportedScalarSortElementType(keyType)) { - withInfo(ms, s"MapSort on map with key type $keyType is not supported") + withFallbackReason(ms, s"MapSort on map with key type $keyType is not supported") None } else if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get() && SupportLevel.containsFloatingPoint(keyType)) { - withInfo( + withFallbackReason( ms, "MapSort on floating-point key is not 100% compatible with Spark, and Comet is " + s"running with ${CometConf.COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + diff --git a/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala index 676cb468b4..e54f0962d0 100644 --- a/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.internal.types.StringTypeWithCollation import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, DataTypes, MapType, StringType, TimeType} import org.apache.comet.{CometConf, CometExplainInfo} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible, SupportLevel} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} @@ -132,7 +132,7 @@ trait CometExprShim extends CommonStringExprs { .setToPrettyString(toPrettyString) .build()) case _ => - withInfo(expr, child) + withFallbackReason(expr, child) None } } else { @@ -154,8 +154,8 @@ trait CometExprShim extends CommonStringExprs { val exprProto = exprToProtoInternal(toJson, inputs, binding) if (exprProto.isEmpty) { toJson - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } exprProto case (Literal(evaluator: ParseUrlEvaluator, _), "evaluate", args) => @@ -163,8 +163,8 @@ trait CometExprShim extends CommonStringExprs { val result = exprToProtoInternal(parseUrl, inputs, binding) if (result.isEmpty) { parseUrl - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } result case (Literal(parser: ToTimeParser, _), "parse", args) @@ -194,11 +194,11 @@ trait CometExprShim extends CommonStringExprs { case ms: MapSort => val keyType = ms.dataType.asInstanceOf[MapType].keyType if (!supportedScalarSortElementType(keyType)) { - withInfo(ms, s"MapSort on map with key type $keyType is not supported") + withFallbackReason(ms, s"MapSort on map with key type $keyType is not supported") None } else if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get() && SupportLevel.containsFloatingPoint(keyType)) { - withInfo( + withFallbackReason( ms, "MapSort on floating-point key is not 100% compatible with Spark, and Comet is " + s"running with ${CometConf.COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + diff --git a/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala index 676cb468b4..e54f0962d0 100644 --- a/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.internal.types.StringTypeWithCollation import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, DataTypes, MapType, StringType, TimeType} import org.apache.comet.{CometConf, CometExplainInfo} -import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible, SupportLevel} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} @@ -132,7 +132,7 @@ trait CometExprShim extends CommonStringExprs { .setToPrettyString(toPrettyString) .build()) case _ => - withInfo(expr, child) + withFallbackReason(expr, child) None } } else { @@ -154,8 +154,8 @@ trait CometExprShim extends CommonStringExprs { val exprProto = exprToProtoInternal(toJson, inputs, binding) if (exprProto.isEmpty) { toJson - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } exprProto case (Literal(evaluator: ParseUrlEvaluator, _), "evaluate", args) => @@ -163,8 +163,8 @@ trait CometExprShim extends CommonStringExprs { val result = exprToProtoInternal(parseUrl, inputs, binding) if (result.isEmpty) { parseUrl - .getTagValue(CometExplainInfo.EXTENSION_INFO) - .foreach(reasons => i.setTagValue(CometExplainInfo.EXTENSION_INFO, reasons)) + .getTagValue(CometExplainInfo.FALLBACK_REASONS) + .foreach(reasons => i.setTagValue(CometExplainInfo.FALLBACK_REASONS, reasons)) } result case (Literal(parser: ToTimeParser, _), "parse", args) @@ -194,11 +194,11 @@ trait CometExprShim extends CommonStringExprs { case ms: MapSort => val keyType = ms.dataType.asInstanceOf[MapType].keyType if (!supportedScalarSortElementType(keyType)) { - withInfo(ms, s"MapSort on map with key type $keyType is not supported") + withFallbackReason(ms, s"MapSort on map with key type $keyType is not supported") None } else if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get() && SupportLevel.containsFloatingPoint(keyType)) { - withInfo( + withFallbackReason( ms, "MapSort on floating-point key is not 100% compatible with Spark, and Comet is " + s"running with ${CometConf.COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index a172538f45..4f39d46e30 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -938,7 +938,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("withInfo") { + test("withFallbackReason") { val table = "with_info" withTable(table) { sql(s"create table $table(id int, name varchar(20)) using parquet") @@ -947,14 +947,14 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { val (_, cometPlan) = checkSparkAnswerAndOperator(query) val project = stripAQEPlan(cometPlan).collectFirst { case p: CometProjectExec => p }.get val id = project.expressions.head - CometSparkSessionExtensions.withInfo(id, "reason 1") - CometSparkSessionExtensions.withInfo(project, "reason 2") - CometSparkSessionExtensions.withInfo(project, "reason 3", id) - CometSparkSessionExtensions.withInfo(project, id) - CometSparkSessionExtensions.withInfo(project, "reason 4") - CometSparkSessionExtensions.withInfo(project, "reason 5", id) - CometSparkSessionExtensions.withInfo(project, id) - CometSparkSessionExtensions.withInfo(project, "reason 6") + CometSparkSessionExtensions.withFallbackReason(id, "reason 1") + CometSparkSessionExtensions.withFallbackReason(project, "reason 2") + CometSparkSessionExtensions.withFallbackReason(project, "reason 3", id) + CometSparkSessionExtensions.withFallbackReason(project, id) + CometSparkSessionExtensions.withFallbackReason(project, "reason 4") + CometSparkSessionExtensions.withFallbackReason(project, "reason 5", id) + CometSparkSessionExtensions.withFallbackReason(project, id) + CometSparkSessionExtensions.withFallbackReason(project, "reason 6") val explain = new ExtendedExplainInfo().generateExtendedInfo(project) for (i <- 1 until 7) { assert(explain.contains(s"reason $i")) diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/CometDppFallbackRepro3949Suite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/CometDppFallbackRepro3949Suite.scala index 56a1b44070..f672ebc082 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/CometDppFallbackRepro3949Suite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/CometDppFallbackRepro3949Suite.scala @@ -209,7 +209,7 @@ class CometDppFallbackRepro3949Suite extends CometTestBase { def walk(p: SparkPlan): Unit = { p match { case s: CometShuffleExchangeExec => - val tags = s.getTagValue(CometExplainInfo.EXTENSION_INFO).getOrElse(Set.empty[String]) + val tags = s.getTagValue(CometExplainInfo.FALLBACK_REASONS).getOrElse(Set.empty[String]) if (tags.exists(_.contains("Dynamic Partition Pruning"))) acc += s case _ => } diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/CometShuffleFallbackStickinessSuite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/CometShuffleFallbackStickinessSuite.scala index b671e04042..23f5ad5d41 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/CometShuffleFallbackStickinessSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/CometShuffleFallbackStickinessSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.internal.SQLConf import org.apache.comet.CometConf -import org.apache.comet.CometSparkSessionExtensions.{hasExplainInfo, withInfo} +import org.apache.comet.CometSparkSessionExtensions.{hasFallbackReason, withFallbackReason} /** * Pins the sticky-fallback invariant for Comet shuffle decisions: `shuffleSupported` must return @@ -43,14 +43,14 @@ import org.apache.comet.CometSparkSessionExtensions.{hasExplainInfo, withInfo} * at initial planning and then convert to Comet at stage prep, producing plan-shape * inconsistencies across the two passes (suspected mechanism behind #3949). * - * The coordinator tags the node with `withInfos` only on total fallback and short-circuits via - * `hasExplainInfo` on subsequent passes. + * The coordinator tags the node with `withFallbackReasons` only on total fallback and + * short-circuits via `hasFallbackReason` on subsequent passes. */ class CometShuffleFallbackStickinessSuite extends CometTestBase { test("shuffleSupported returns None when the shuffle already carries explain info") { val shuffle = ShuffleExchangeExec(SinglePartition, SyntheticLeaf(Nil)) - withInfo(shuffle, "pretend prior pass decided Spark fallback") + withFallbackReason(shuffle, "pretend prior pass decided Spark fallback") assert( CometShuffleExchangeExec.shuffleSupported(shuffle).isEmpty, @@ -109,7 +109,7 @@ class CometShuffleFallbackStickinessSuite extends CometTestBase { // Pass 1: real DPP subtree visible. Returns None AND tags the shuffle. val first = CometShuffleExchangeExec.shuffleSupported(shuffle) assert(first.isEmpty, "initial pass must fall back (DPP visible)") - assert(hasExplainInfo(shuffle), "fallback reason must be tagged on the shuffle") + assert(hasFallbackReason(shuffle), "fallback reason must be tagged on the shuffle") // Pass 2 simulates AQE stage-prep: replace the child with an opaque leaf that hides // the DPP subtree from tree walks. A naive `.exists`-based check would flip to "convert" diff --git a/spark/src/test/spark-4.0/org/apache/spark/sql/CometCollationSuite.scala b/spark/src/test/spark-4.0/org/apache/spark/sql/CometCollationSuite.scala index 4623f4591e..2c8451fc36 100644 --- a/spark/src/test/spark-4.0/org/apache/spark/sql/CometCollationSuite.scala +++ b/spark/src/test/spark-4.0/org/apache/spark/sql/CometCollationSuite.scala @@ -110,7 +110,7 @@ class CometCollationSuite extends CometTestBase { } private def assertFallbackReason(plan: SparkPlan, expectedReason: String): Unit = { - val reasons = plan.getTagValue(CometExplainInfo.EXTENSION_INFO).getOrElse(Set.empty[String]) + val reasons = plan.getTagValue(CometExplainInfo.FALLBACK_REASONS).getOrElse(Set.empty[String]) assert( reasons.contains(expectedReason), s"Expected fallback reason '$expectedReason' on ${plan.nodeName}, got: $reasons") From 6e37f6fcf849fbcbc0df5e61aa9fde3e04456745 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 29 May 2026 17:26:31 -0600 Subject: [PATCH 2/2] refactor: rename optExprWithInfo to optExprWithFallbackReason Apply rename consistently across the serde layer per review feedback. --- .../comet/serde/CometScalarFunction.scala | 4 +-- .../apache/comet/serde/QueryPlanSerde.scala | 4 +-- .../org/apache/comet/serde/arithmetic.scala | 4 +-- .../scala/org/apache/comet/serde/arrays.scala | 28 +++++++++---------- .../org/apache/comet/serde/bitwise.scala | 4 +-- .../comet/serde/contraintExpressions.scala | 4 +-- .../org/apache/comet/serde/datetime.scala | 20 ++++++------- .../comet/serde/decimalExpressions.scala | 6 ++-- .../scala/org/apache/comet/serde/maps.scala | 12 ++++---- .../scala/org/apache/comet/serde/math.scala | 22 +++++++-------- .../org/apache/comet/serde/predicates.scala | 2 +- .../org/apache/comet/serde/statics.scala | 6 ++-- .../org/apache/comet/serde/strings.scala | 12 ++++---- .../org/apache/comet/serde/unixtime.scala | 4 +-- .../scala/org/apache/comet/serde/url.scala | 4 +-- .../apache/comet/shims/CometExprShim.scala | 4 +-- .../apache/comet/shims/CometExprShim.scala | 8 +++--- .../apache/comet/shims/CometExprShim.scala | 14 +++++----- .../apache/comet/shims/CometExprShim.scala | 14 +++++----- 19 files changed, 88 insertions(+), 88 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/CometScalarFunction.scala b/spark/src/main/scala/org/apache/comet/serde/CometScalarFunction.scala index aa3bf775fb..2e056ae886 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometScalarFunction.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometScalarFunction.scala @@ -22,13 +22,13 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.comet.serde.ExprOuterClass.Expr -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} /** Serde for scalar function. */ case class CometScalarFunction[T <: Expression](name: String) extends CometExpressionSerde[T] { override def convert(expr: T, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = { val childExpr = expr.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto(name, childExpr: _*) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index c00e9145fc..0bdc02a790 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -864,8 +864,8 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { Some(ExprOuterClass.Expr.newBuilder().setScalarFunc(builder).build()) } - // Utility method. Adds explain info if the result of calling exprToProto is None - def optExprWithInfo( + // Utility method. Adds fallback reason if the result of calling exprToProto is None + def optExprWithFallbackReason( optExpr: Option[Expr], expr: Expression, childExpr: Expression*): Option[Expr] = { diff --git a/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala b/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala index 2101a4e4aa..58e99f9c79 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arithmetic.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.types.{ByteType, DataType, DecimalType, DoubleType, import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} -import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProtoWithReturnType, serializeDataType} +import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProtoWithReturnType, serializeDataType} import org.apache.comet.shims.CometEvalModeUtil trait MathBase { @@ -330,7 +330,7 @@ object CometRound extends CometExpressionSerde[Round] { r.ansiEnabled, childExpr, scaleExpr) - optExprWithInfo(optExpr, r, r.child) + optExprWithFallbackReason(optExpr, r, r.child) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index 32d9b8e983..b3ea8d7c4f 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -187,7 +187,7 @@ object CometSortArray extends CometExpressionSerde[SortArray] { arrayExprProto, sortDirectionExprProto, nullOrderingExprProto) - optExprWithInfo(sortArrayScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(sortArrayScalarExpr, expr, expr.children: _*) } } @@ -221,7 +221,7 @@ object CometArrayIntersect extends CometExpressionSerde[ArrayIntersect] with Com val arraysIntersectScalarExpr = scalarFunctionExprToProto("array_intersect", leftArrayExprProto, rightArrayExprProto) - optExprWithInfo(arraysIntersectScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arraysIntersectScalarExpr, expr, expr.children: _*) } } @@ -234,7 +234,7 @@ object CometArrayMax extends CometExpressionSerde[ArrayMax] { val arrayMaxScalarExpr = scalarFunctionExprToProto("array_max", arrayExprProto) - optExprWithInfo(arrayMaxScalarExpr, expr) + optExprWithFallbackReason(arrayMaxScalarExpr, expr) } } @@ -246,7 +246,7 @@ object CometArrayMin extends CometExpressionSerde[ArrayMin] { val arrayExprProto = exprToProto(expr.children.head, inputs, binding) val arrayMinScalarExpr = scalarFunctionExprToProto("array_min", arrayExprProto) - optExprWithInfo(arrayMinScalarExpr, expr) + optExprWithFallbackReason(arrayMinScalarExpr, expr) } } @@ -264,7 +264,7 @@ object CometArraysOverlap extends CometExpressionSerde[ArraysOverlap] { false, leftArrayExprProto, rightArrayExprProto) - optExprWithInfo(arraysOverlapScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arraysOverlapScalarExpr, expr, expr.children: _*) } } @@ -328,7 +328,7 @@ object CometArrayCompact extends CometExpressionSerde[Expression] { ArrayType(elementType = elementType), false, arrayExprProto) - optExprWithInfo(arrayCompactScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arrayCompactScalarExpr, expr, expr.children: _*) } } @@ -372,7 +372,7 @@ object CometArrayExcept extends CometExpressionSerde[ArrayExcept] with CometExpr val arrayExceptScalarExpr = scalarFunctionExprToProto("array_except", leftArrayExprProto, rightArrayExprProto) - optExprWithInfo(arrayExceptScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arrayExceptScalarExpr, expr, expr.children: _*) } } @@ -402,7 +402,7 @@ object CometArrayJoin extends CometExpressionSerde[ArrayJoin] { delimiterExprProto, nullReplacementExprProto) - optExprWithInfo( + optExprWithFallbackReason( arrayJoinScalarExpr, expr, arrayExpr, @@ -412,7 +412,7 @@ object CometArrayJoin extends CometExpressionSerde[ArrayJoin] { val arrayJoinScalarExpr = scalarFunctionExprToProto("array_to_string", arrayExprProto, delimiterExprProto) - optExprWithInfo(arrayJoinScalarExpr, expr, arrayExpr, arrayExpr.delimiter) + optExprWithFallbackReason(arrayJoinScalarExpr, expr, arrayExpr, arrayExpr.delimiter) } } } @@ -475,7 +475,7 @@ object CometSlice extends CometExpressionSerde[Slice] { arrayExprProto, startExprProto, lengthExprProto) - optExprWithInfo(sliceScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(sliceScalarExpr, expr, expr.children: _*) } } @@ -489,7 +489,7 @@ object CometArrayUnion extends CometExpressionSerde[ArrayUnion] { val arraysUnionScalarExpr = scalarFunctionExprToProto("array_union", leftArrayExprProto, rightArrayExprProto) - optExprWithInfo(arraysUnionScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(arraysUnionScalarExpr, expr, expr.children: _*) } } @@ -580,7 +580,7 @@ object CometArrayReverse extends CometExpressionSerde[Reverse] with ArraysBase { } val reverseExprProto = exprToProto(expr.child, inputs, binding) val reverseScalarExpr = scalarFunctionExprToProto("array_reverse", reverseExprProto) - optExprWithInfo(reverseScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(reverseScalarExpr, expr, expr.children: _*) } } @@ -641,7 +641,7 @@ object CometFlatten extends CometExpressionSerde[Flatten] with ArraysBase { } val flattenExprProto = exprToProto(expr.child, inputs, binding) val flattenScalarExpr = scalarFunctionExprToProto("flatten", flattenExprProto) - optExprWithInfo(flattenScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(flattenScalarExpr, expr, expr.children: _*) } } @@ -750,7 +750,7 @@ object CometArrayPosition extends CometExpressionSerde[ArrayPosition] with Array // (matching Spark's behavior) val optExpr = scalarFunctionExprToProto("spark_array_position", arrayExprProto, elementExprProto) - optExprWithInfo(optExpr, expr, expr.left, expr.right) + optExprWithFallbackReason(optExpr, expr, expr.left, expr.right) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/bitwise.scala b/spark/src/main/scala/org/apache/comet/serde/bitwise.scala index 751fb7521f..7c05dc4349 100644 --- a/spark/src/main/scala/org/apache/comet/serde/bitwise.scala +++ b/spark/src/main/scala/org/apache/comet/serde/bitwise.scala @@ -47,7 +47,7 @@ object CometBitwiseNot extends CometExpressionSerde[BitwiseNot] { val childProto = exprToProto(expr.child, inputs, binding) val bitNotScalarExpr = scalarFunctionExprToProto("bitwise_not", childProto) - optExprWithInfo(bitNotScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(bitNotScalarExpr, expr, expr.children: _*) } } @@ -136,7 +136,7 @@ object CometBitwiseGet extends CometExpressionSerde[BitwiseGet] { val posProto = exprToProto(expr.right, inputs, binding) val bitGetScalarExpr = scalarFunctionExprToProtoWithReturnType("bit_get", ByteType, false, argProto, posProto) - optExprWithInfo(bitGetScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(bitGetScalarExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala b/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala index fbf9e4ecff..5ffcae2b2d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala +++ b/spark/src/main/scala/org/apache/comet/serde/contraintExpressions.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, KnownFloatingPointN import org.apache.spark.sql.catalyst.optimizer.NormalizeNaNAndZero import org.apache.comet.CometSparkSessionExtensions.withFallbackReason -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, serializeDataType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, serializeDataType} object CometKnownFloatingPointNormalized extends CometExpressionSerde[KnownFloatingPointNormalized] { @@ -61,6 +61,6 @@ object CometKnownFloatingPointNormalized .setDatatype(dataType.get) ExprOuterClass.Expr.newBuilder().setNormalizeNanAndZero(builder).build() } - optExprWithInfo(optExpr, expr, wrapped) + optExprWithFallbackReason(optExpr, expr, wrapped) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala b/spark/src/main/scala/org/apache/comet/serde/datetime.scala index 35a0e19b47..9afb4cc07e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala @@ -77,7 +77,7 @@ trait CometExprGetDateField[T <: GetDateField] { .build()) .build() }) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -135,7 +135,7 @@ object CometDayOfWeek .build() } .headOption - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -388,7 +388,7 @@ object CometFromUTCTimestamp extends CometExpressionSerde[FromUTCTimestamp] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExprs = expr.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("from_utc_timestamp", childExprs: _*) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } @@ -406,7 +406,7 @@ object CometToUTCTimestamp extends CometExpressionSerde[ToUTCTimestamp] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExprs = expr.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("to_utc_timestamp", childExprs: _*) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } @@ -427,7 +427,7 @@ object CometConvertTimezone extends CometExpressionSerde[ConvertTimezone] { val ts = exprToProtoInternal(expr.sourceTs, inputs, binding) val toUtc = scalarFunctionExprToProto("to_utc_timestamp", ts, srcTz) val fromUtc = scalarFunctionExprToProto("from_utc_timestamp", toUtc, tgtTz) - optExprWithInfo(fromUtc, expr, expr.children: _*) + optExprWithFallbackReason(fromUtc, expr, expr.children: _*) } } @@ -473,7 +473,7 @@ object CometUnixDate extends CometExpressionSerde[UnixDate] { .build()) .build() } - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -515,7 +515,7 @@ object CometTruncDate extends CometExpressionSerde[TruncDate] { false, childExpr, formatExpr) - optExprWithInfo(optExpr, expr, expr.date, expr.format) + optExprWithFallbackReason(optExpr, expr, expr.date, expr.format) } } @@ -682,7 +682,7 @@ object CometDateFormat extends CometExpressionSerde[DateFormatClass] { false, childExpr, formatExpr) - optExprWithInfo(optExpr, expr, expr.left, expr.right) + optExprWithFallbackReason(optExpr, expr, expr.left, expr.right) } else { // Hand the full `DateFormatClass` (with `timeZoneId` already stamped by `ResolveTimeZone`) // to the codegen dispatcher. It closure-serializes the bound tree, so non-UTC timezones @@ -719,7 +719,7 @@ object CometHours extends CometExpressionSerde[Hours] { withFallbackReason(expr, s"Hours does not support input type: $other") None } - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -768,7 +768,7 @@ object CometDays extends CometExpressionSerde[Days] { .build() } - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/decimalExpressions.scala b/spark/src/main/scala/org/apache/comet/serde/decimalExpressions.scala index 52adf950c1..f41e0fafaa 100644 --- a/spark/src/main/scala/org/apache/comet/serde/decimalExpressions.scala +++ b/spark/src/main/scala/org/apache/comet/serde/decimalExpressions.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, MakeDecimal, UnscaledValue} import org.apache.spark.sql.types.{DecimalType, LongType} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProtoWithReturnType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProtoWithReturnType} object CometUnscaledValue extends CometExpressionSerde[UnscaledValue] { override def convert( @@ -32,7 +32,7 @@ object CometUnscaledValue extends CometExpressionSerde[UnscaledValue] { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val optExpr = scalarFunctionExprToProtoWithReturnType("unscaled_value", LongType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -58,7 +58,7 @@ object CometMakeDecimal extends CometExpressionSerde[MakeDecimal] { DecimalType(expr.precision, expr.scale), failOnError = !expr.nullOnOverflow, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/maps.scala b/spark/src/main/scala/org/apache/comet/serde/maps.scala index 01c100b5e7..abecbaa16d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/maps.scala +++ b/spark/src/main/scala/org/apache/comet/serde/maps.scala @@ -22,7 +22,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ -import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} object CometMapKeys extends CometExpressionSerde[MapKeys] { @@ -32,7 +32,7 @@ object CometMapKeys extends CometExpressionSerde[MapKeys] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val mapKeysScalarExpr = scalarFunctionExprToProto("map_keys", childExpr) - optExprWithInfo(mapKeysScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapKeysScalarExpr, expr, expr.children: _*) } } @@ -44,7 +44,7 @@ object CometMapEntries extends CometExpressionSerde[MapEntries] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val mapEntriesScalarExpr = scalarFunctionExprToProto("map_entries", childExpr) - optExprWithInfo(mapEntriesScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapEntriesScalarExpr, expr, expr.children: _*) } } @@ -56,7 +56,7 @@ object CometMapValues extends CometExpressionSerde[MapValues] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val mapValuesScalarExpr = scalarFunctionExprToProto("map_values", childExpr) - optExprWithInfo(mapValuesScalarExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapValuesScalarExpr, expr, expr.children: _*) } } @@ -69,7 +69,7 @@ object CometMapExtract extends CometExpressionSerde[GetMapValue] { val mapExpr = exprToProtoInternal(expr.child, inputs, binding) val keyExpr = exprToProtoInternal(expr.key, inputs, binding) val mapExtractExpr = scalarFunctionExprToProto("map_extract", mapExpr, keyExpr) - optExprWithInfo(mapExtractExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapExtractExpr, expr, expr.children: _*) } } @@ -129,7 +129,7 @@ object CometMapContainsKey extends CometExpressionSerde[MapContainsKey] { val mapKeysExpr = scalarFunctionExprToProto("map_keys", mapExpr) val mapContainsKeyExpr = scalarFunctionExprToProto("array_has", mapKeysExpr, keyExpr) - optExprWithInfo(mapContainsKeyExpr, expr, expr.children: _*) + optExprWithFallbackReason(mapContainsKeyExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/math.scala b/spark/src/main/scala/org/apache/comet/serde/math.scala index 659f1e6844..3be98604c3 100644 --- a/spark/src/main/scala/org/apache/comet/serde/math.scala +++ b/spark/src/main/scala/org/apache/comet/serde/math.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.{Abs, Add, Atan2, Attribute, Ce import org.apache.spark.sql.types.{DecimalType, DoubleType, NumericType} import org.apache.comet.CometSparkSessionExtensions.withFallbackReason -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, serializeDataType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, serializeDataType} object CometAtan2 extends CometExpressionSerde[Atan2] { override def convert( @@ -36,7 +36,7 @@ object CometAtan2 extends CometExpressionSerde[Atan2] { val leftExpr = exprToProtoInternal(left, inputs, binding) val rightExpr = exprToProtoInternal(right, inputs, binding) val optExpr = scalarFunctionExprToProto("atan2", leftExpr, rightExpr) - optExprWithInfo(optExpr, expr, expr.left, expr.right) + optExprWithFallbackReason(optExpr, expr, expr.left, expr.right) } } @@ -55,7 +55,7 @@ object CometCeil extends CometExpressionSerde[Ceil] { case _ => val optExpr = scalarFunctionExprToProtoWithReturnType("ceil", expr.dataType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } } @@ -75,7 +75,7 @@ object CometFloor extends CometExpressionSerde[Floor] { case _ => val optExpr = scalarFunctionExprToProtoWithReturnType("floor", expr.dataType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } } @@ -90,7 +90,7 @@ object CometLog extends CometExpressionSerde[Log] with MathExprBase { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(nullIfNegative(expr.child), inputs, binding) val optExpr = scalarFunctionExprToProto("ln", childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -101,7 +101,7 @@ object CometLog10 extends CometExpressionSerde[Log10] with MathExprBase { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(nullIfNegative(expr.child), inputs, binding) val optExpr = scalarFunctionExprToProto("log10", childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -112,7 +112,7 @@ object CometLog2 extends CometExpressionSerde[Log2] with MathExprBase { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(nullIfNegative(expr.child), inputs, binding) val optExpr = scalarFunctionExprToProto("log2", childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -128,7 +128,7 @@ object CometLogarithm extends CometExpressionSerde[Logarithm] { val rightExpr = exprToProtoInternal(expr.right, inputs, binding) val optExpr = scalarFunctionExprToProtoWithReturnType("spark_log", DoubleType, false, leftExpr, rightExpr) - optExprWithInfo(optExpr, expr, expr.left, expr.right) + optExprWithFallbackReason(optExpr, expr, expr.left, expr.right) } } @@ -139,7 +139,7 @@ object CometHex extends CometExpressionSerde[Hex] with MathExprBase { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val optExpr = scalarFunctionExprToProtoWithReturnType("hex", expr.dataType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -158,7 +158,7 @@ object CometUnhex extends CometExpressionSerde[Unhex] with MathExprBase { false, childExpr, failOnErrorExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } @@ -192,7 +192,7 @@ object CometAbs extends CometExpressionSerde[Abs] with MathExprBase { false, childExpr, failOnErrorExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/predicates.scala b/spark/src/main/scala/org/apache/comet/serde/predicates.scala index a8fced3abe..7abe40823e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/predicates.scala +++ b/spark/src/main/scala/org/apache/comet/serde/predicates.scala @@ -220,7 +220,7 @@ object CometIsNaN extends CometExpressionSerde[IsNaN] { val childExpr = exprToProtoInternal(expr.child, inputs, binding) val optExpr = scalarFunctionExprToProtoWithReturnType("isnan", BooleanType, false, childExpr) - optExprWithInfo(optExpr, expr, expr.child) + optExprWithFallbackReason(optExpr, expr, expr.child) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/statics.scala b/spark/src/main/scala/org/apache/comet/serde/statics.scala index 26f96ca1e0..b2a4b991d1 100644 --- a/spark/src/main/scala/org/apache/comet/serde/statics.scala +++ b/spark/src/main/scala/org/apache/comet/serde/statics.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils import org.apache.comet.CometSparkSessionExtensions.withFallbackReason -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} object CometStaticInvoke extends CometExpressionSerde[StaticInvoke] { @@ -64,7 +64,7 @@ object CometUrlEncodeStaticInvoke extends CometExpressionSerde[StaticInvoke] { binding: Boolean): Option[ExprOuterClass.Expr] = { val childExpr = exprToProtoInternal(expr.children.head, inputs, binding) val optExpr = scalarFunctionExprToProto("url_encode", childExpr) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } @@ -80,6 +80,6 @@ object CometUrlDecodeStaticInvoke extends CometExpressionSerde[StaticInvoke] { val funcName = if (failOnError) "url_decode" else "try_url_decode" val childExpr = exprToProtoInternal(expr.children.head, inputs, binding) val optExpr = scalarFunctionExprToProto(funcName, childExpr) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index 08eb03982d..f2f10d5f1c 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -29,7 +29,7 @@ import org.apache.comet.CometConf import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode, RegExp} import org.apache.comet.serde.ExprOuterClass.Expr -import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType} +import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType} object CometStringRepeat extends CometExpressionSerde[StringRepeat] { @@ -47,7 +47,7 @@ object CometStringRepeat extends CometExpressionSerde[StringRepeat] { val leftExpr = exprToProtoInternal(leftCast, inputs, binding) val rightExpr = exprToProtoInternal(rightCast, inputs, binding) val optExpr = scalarFunctionExprToProto("repeat", leftExpr, rightExpr) - optExprWithInfo(optExpr, expr, leftCast, rightCast) + optExprWithFallbackReason(optExpr, expr, leftCast, rightCast) } } @@ -140,7 +140,7 @@ object CometSubstringIndex extends CometExpressionSerde[SubstringIndex] { val countExpr = exprToProtoInternal(countCast, inputs, binding) val optExpr = scalarFunctionExprToProto("substring_index", strExpr, delimExpr, countExpr) - optExprWithInfo(optExpr, expr, expr.strExpr, expr.delimExpr, expr.countExpr) + optExprWithFallbackReason(optExpr, expr, expr.strExpr, expr.delimExpr, expr.countExpr) } } @@ -407,7 +407,7 @@ object CometRegExpReplace extends CometExpressionSerde[RegExpReplace] { patternExpr, replacementExpr, flagsExpr) - optExprWithInfo(optExpr, expr, expr.subject, expr.regexp, expr.rep, expr.pos) + optExprWithFallbackReason(optExpr, expr, expr.subject, expr.regexp, expr.rep, expr.pos) } } @@ -438,7 +438,7 @@ object CometStringSplit extends CometExpressionSerde[StringSplit] { strExpr, regexExpr, limitExpr) - optExprWithInfo(optExpr, expr, expr.str, expr.regex, expr.limit) + optExprWithFallbackReason(optExpr, expr, expr.str, expr.regex, expr.limit) } } @@ -465,7 +465,7 @@ object CometGetJsonObject extends CometExpressionSerde[GetJsonObject] { false, jsonExpr, pathExpr) - optExprWithInfo(optExpr, expr, expr.json, expr.path) + optExprWithFallbackReason(optExpr, expr, expr.json, expr.path) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala index 916334179b..07c748219a 100644 --- a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, FromUnixTime, Liter import org.apache.spark.sql.catalyst.util.TimestampFormatter import org.apache.comet.CometSparkSessionExtensions.withFallbackReason -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} // TODO: DataFusion supports only -8334601211038 <= sec <= 8210266876799 // https://github.com/apache/datafusion/issues/16594 @@ -55,7 +55,7 @@ object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] { val timestampExpr = scalarFunctionExprToProto("from_unixtime", Seq(secExpr, timeZone): _*) val optExpr = scalarFunctionExprToProto("to_char", Seq(timestampExpr, formatExpr): _*) - optExprWithInfo(optExpr, expr, expr.sec, expr.format) + optExprWithFallbackReason(optExpr, expr, expr.sec, expr.format) } else { withFallbackReason(expr, expr.sec, expr.format) None diff --git a/spark/src/main/scala/org/apache/comet/serde/url.scala b/spark/src/main/scala/org/apache/comet/serde/url.scala index b672cc17bb..b54d78ba25 100644 --- a/spark/src/main/scala/org/apache/comet/serde/url.scala +++ b/spark/src/main/scala/org/apache/comet/serde/url.scala @@ -21,7 +21,7 @@ package org.apache.comet.serde import org.apache.spark.sql.catalyst.expressions.{Attribute, ParseUrl} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} object CometParseUrl extends CometExpressionSerde[ParseUrl] { @@ -32,6 +32,6 @@ object CometParseUrl extends CometExpressionSerde[ParseUrl] { val funcName = if (expr.failOnError) "parse_url" else "try_parse_url" val childExprs = expr.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto(funcName, childExprs: _*) - optExprWithInfo(optExpr, expr, expr.children: _*) + optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala index 3687e66079..1444f3e669 100644 --- a/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala @@ -27,7 +27,7 @@ import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto} /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. @@ -85,7 +85,7 @@ trait CometExprShim extends CommonStringExprs { case wb: WidthBucket => val childExprs = wb.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) - optExprWithInfo(optExpr, wb, wb.children: _*) + optExprWithFallbackReason(optExpr, wb, wb.children: _*) case _ => None } diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala index 2c3364719c..85a8e9b292 100644 --- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala @@ -33,7 +33,7 @@ import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible, SupportLevel} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. @@ -75,7 +75,7 @@ trait CometExprShim extends CommonStringExprs { returnType, false, arrayExprProto) - optExprWithInfo(scalarExpr, knc, arrayChild) + optExprWithFallbackReason(scalarExpr, knc, arrayChild) case _ => exprToProtoInternal(knc.child, inputs, binding) } case _ => exprToProtoInternal(knc.child, inputs, binding) @@ -132,7 +132,7 @@ trait CometExprShim extends CommonStringExprs { case wb: WidthBucket => val childExprs = wb.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) - optExprWithInfo(optExpr, wb, wb.children: _*) + optExprWithFallbackReason(optExpr, wb, wb.children: _*) // In Spark 4.x, RuntimeReplaceable expressions (StructsToJson, ParseUrl) become // Invoke(Literal(Evaluator), "evaluate", ...). Reconstruct the original expression @@ -180,7 +180,7 @@ trait CometExprShim extends CommonStringExprs { ms.dataType, failOnError = false, childExpr) - optExprWithInfo(mapSortExpr, ms, ms.child) + optExprWithFallbackReason(mapSortExpr, ms, ms.child) } case _ => None diff --git a/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala index e54f0962d0..1e31360eb0 100644 --- a/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala @@ -34,7 +34,7 @@ import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible, SupportLevel} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. @@ -75,7 +75,7 @@ trait CometExprShim extends CommonStringExprs { returnType, false, arrayExprProto) - optExprWithInfo(scalarExpr, knc, arrayChild) + optExprWithFallbackReason(scalarExpr, knc, arrayChild) case _ => exprToProtoInternal(knc.child, inputs, binding) } case _ => exprToProtoInternal(knc.child, inputs, binding) @@ -102,7 +102,7 @@ trait CometExprShim extends CommonStringExprs { val childExprs = s.arguments.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProtoWithReturnType("make_time", s.dataType, true, childExprs: _*) - optExprWithInfo(optExpr, expr, s.arguments: _*) + optExprWithFallbackReason(optExpr, expr, s.arguments: _*) case expr @ ToPrettyString(child, timeZoneId) => val castSupported = CometCast.isSupported( @@ -142,7 +142,7 @@ trait CometExprShim extends CommonStringExprs { case wb: WidthBucket => val childExprs = wb.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) - optExprWithInfo(optExpr, wb, wb.children: _*) + optExprWithFallbackReason(optExpr, wb, wb.children: _*) // In Spark 4.x, RuntimeReplaceable expressions (StructsToJson, ParseUrl) become // Invoke(Literal(Evaluator), "evaluate", ...). Reconstruct the original expression @@ -172,7 +172,7 @@ trait CometExprShim extends CommonStringExprs { val childExprs = args.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProtoWithReturnType("to_time", i.dataType, true, childExprs: _*) - optExprWithInfo(optExpr, i, args: _*) + optExprWithFallbackReason(optExpr, i, args: _*) case _ => None } @@ -187,7 +187,7 @@ trait CometExprShim extends CommonStringExprs { i.dataType, false, childExprs: _*) - optExprWithInfo(optExpr, expr, args: _*) + optExprWithFallbackReason(optExpr, expr, args: _*) case _ => None } @@ -211,7 +211,7 @@ trait CometExprShim extends CommonStringExprs { ms.dataType, failOnError = false, childExpr) - optExprWithInfo(mapSortExpr, ms, ms.child) + optExprWithFallbackReason(mapSortExpr, ms, ms.child) } case _ => None diff --git a/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala index e54f0962d0..1e31360eb0 100644 --- a/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.2/org/apache/comet/shims/CometExprShim.scala @@ -34,7 +34,7 @@ import org.apache.comet.CometSparkSessionExtensions.withFallbackReason import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible, SupportLevel} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} +import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithFallbackReason, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, supportedScalarSortElementType} /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. @@ -75,7 +75,7 @@ trait CometExprShim extends CommonStringExprs { returnType, false, arrayExprProto) - optExprWithInfo(scalarExpr, knc, arrayChild) + optExprWithFallbackReason(scalarExpr, knc, arrayChild) case _ => exprToProtoInternal(knc.child, inputs, binding) } case _ => exprToProtoInternal(knc.child, inputs, binding) @@ -102,7 +102,7 @@ trait CometExprShim extends CommonStringExprs { val childExprs = s.arguments.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProtoWithReturnType("make_time", s.dataType, true, childExprs: _*) - optExprWithInfo(optExpr, expr, s.arguments: _*) + optExprWithFallbackReason(optExpr, expr, s.arguments: _*) case expr @ ToPrettyString(child, timeZoneId) => val castSupported = CometCast.isSupported( @@ -142,7 +142,7 @@ trait CometExprShim extends CommonStringExprs { case wb: WidthBucket => val childExprs = wb.children.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) - optExprWithInfo(optExpr, wb, wb.children: _*) + optExprWithFallbackReason(optExpr, wb, wb.children: _*) // In Spark 4.x, RuntimeReplaceable expressions (StructsToJson, ParseUrl) become // Invoke(Literal(Evaluator), "evaluate", ...). Reconstruct the original expression @@ -172,7 +172,7 @@ trait CometExprShim extends CommonStringExprs { val childExprs = args.map(exprToProtoInternal(_, inputs, binding)) val optExpr = scalarFunctionExprToProtoWithReturnType("to_time", i.dataType, true, childExprs: _*) - optExprWithInfo(optExpr, i, args: _*) + optExprWithFallbackReason(optExpr, i, args: _*) case _ => None } @@ -187,7 +187,7 @@ trait CometExprShim extends CommonStringExprs { i.dataType, false, childExprs: _*) - optExprWithInfo(optExpr, expr, args: _*) + optExprWithFallbackReason(optExpr, expr, args: _*) case _ => None } @@ -211,7 +211,7 @@ trait CometExprShim extends CommonStringExprs { ms.dataType, failOnError = false, childExpr) - optExprWithInfo(mapSortExpr, ms, ms.child) + optExprWithFallbackReason(mapSortExpr, ms, ms.child) } case _ => None