diff --git a/enable-if-version-annotation-macros/pom.xml b/enable-if-version-annotation-macros/pom.xml new file mode 100644 index 0000000000..db46f8b484 --- /dev/null +++ b/enable-if-version-annotation-macros/pom.xml @@ -0,0 +1,81 @@ + + + + + + + 4.0.0 + + org.apache.datafusion + comet-parent-spark${spark.version.short}_${scala.binary.version} + 0.17.0-SNAPSHOT + ../pom.xml + + + comet-enable-if-version-annotation-macros-spark${spark.version.short}_${scala.binary.version} + comet-enable-if-version-annotation-macros + + + + + + org.scala-lang + scala-library + + + + org.scala-lang + scala-reflect + provided + + + + org.semver4j + semver4j + provided + + + + + + + + net.alchim31.maven + scala-maven-plugin + + + + + diff --git a/enable-if-version-annotation-macros/src/main/scala/org/apache/comet/enableIfVer.scala b/enable-if-version-annotation-macros/src/main/scala/org/apache/comet/enableIfVer.scala new file mode 100644 index 0000000000..32ff68f6eb --- /dev/null +++ b/enable-if-version-annotation-macros/src/main/scala/org/apache/comet/enableIfVer.scala @@ -0,0 +1,331 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet + +import scala.annotation.{compileTimeOnly, nowarn, StaticAnnotation} +import scala.language.experimental._ +import scala.reflect.macros.whitebox + +import org.semver4j.Semver + +/** + * Shared machinery behind the version annotations [[enableIfVer]], [[implementIfVer]] and + * [[enableOverrideIfVer]]. + * + * Every annotation performs the same compile-time tree rewrite (drop / empty-body / strip + * `override`). the ONLY thing that differs is a single `Boolean` - does the build's targeted + * version satisfy the given range? So the match decision is computed per annotation and handed to + * the shared expansions here. + * + * The build feeds the targeted version of each dimension to the macro via + * `-Xmacro-settings:enableIfVer.=`, read at expansion time from `c.settings` + * (see [[versionOf]]). Adding a dimension is therefore just one more build entry - no generated + * sources. Comet currently configures a single dimension, `spark`. + * + * Ranges are matched by semver4j: full + * `major.minor.patch` versions with `>` `>=` `<` `<=` `=` `!=`, space = AND, `||` = OR, `A - B` + * hyphen ranges and more. + */ +object EnableIfVerSupport { + + /** + * Does the configured `version` satisfy the semver `range`? + * + * Matching enables `includePrerelease` so a pre-release Spark (e.g. `4.2.0-preview4`) is + * matched by ordinary ranges like `>=4.0.0` / `4` / `>=3.5.0`, and can also be targeted + * explicitly (e.g. `3.5 || 4.2.0-preview4`). Without it semver4j (like node-semver) refuses to + * match a pre-release against a range whose comparators carry no pre-release of the same + * major.minor.patch tuple. + * + * Matching a pre-release at a *lower bound* needs care, because a pre-release sorts BELOW its + * release (`4.0.0-preview4 < 4.0.0`). So to gate "Spark 4.x including the 4.0.0 previews", + * anchor the bound at `-0` (the lowest possible pre-release): write `spark = ">=4.0.0-0"` (or + * the x-range `spark = "4"`), not `">=4.0.0"`. For example: + * - `>=4.0.0` does NOT match `4.0.0-preview4` (but matches `4.2.0-preview4`, `4.0.0`, ...) + * - `>=4.0.0-0` matches `4.0.0-preview4` and everything `>=4.0.0` + * - `4` (x-range) already covers `4.0.0-preview*` (its lower bound is treated inclusively) + */ + def satisfies(range: String, version: String): Boolean = { + // positional arg: semver4j is a Java API, so the `includePrerelease` parameter cannot be + // passed by name + val includePrerelease = true + new Semver(version).satisfies(range.trim, includePrerelease) + } + + /** Prefix of the `-Xmacro-settings` keys this macro understands. */ + private val SettingPrefix = "enableIfVer." + + /** Parse `enableIfVer.=` entries out of `-Xmacro-settings`. */ + private def configuredVersions(c: whitebox.Context): Map[String, String] = + c.settings.collect { + case s if s.startsWith(SettingPrefix) => + s.stripPrefix(SettingPrefix).split("=", 2) match { + case Array(dim, ver) => dim -> parseVersionFromSetting(dim, ver) + case _ => + c.abort( + c.enclosingPosition, + s"@enableIfVer: malformed macro setting '$s' " + + s"(expected $SettingPrefix=)") + } + }.toMap + + private def parseVersionFromSetting(name: String, version: String): String = { + try { + // Not using Semver.parse as it will return null instead of giving us meaningful + // exceptions on invalid input + new Semver(version) + + version + } catch { + case e: Throwable => + sys.error( + s"malformed version passed in macro setting for '$name', expected a valid " + + s"SemVer got '$version' (error: ${e.toString})") + } + } + + /** + * Build-time version configured for `dimension`, read from `-Xmacro-settings`. The single + * extensibility seam: to add a dimension, pass + * `-Xmacro-settings:${SettingPrefix}=` from the build. + * + * Aborts compilation when the dimension was not configured at all. + */ + private def versionOf(c: whitebox.Context, dimension: String): String = { + val versions = configuredVersions(c) + versions.getOrElse( + dimension, + // we do not treat a missing dimension as a match since we want to avoid silent failures + c.abort( + c.enclosingPosition, + s"@enableIfVer: no version configured for dimension '$dimension'" + + s" (configured: ${versions.keys.toList.sorted.mkString(", ")}). " + + "Pass it via the compiler flag " + + s""""-Xmacro-settings:$SettingPrefix$dimension=".""")) + } + + object Macros { + + /** + * Extract the named `dimension = range` argument of a version annotation. A named argument is + * `name = value`, whose immediate children are `[Ident(name), value]` - matched structurally + * so this works on both Scala 2.12 (`AssignOrNamedArg`) and 2.13 (`NamedArg`). A positional + * arg (e.g. a bare literal) has no such children and is rejected. + */ + private def namedRanges(c: whitebox.Context): List[(String, String)] = { + import c.universe._ + val args = c.macroApplication match { + case Apply(Select(Apply(_, as), _), _) => as + } + + args.map { arg => + arg.children match { + case List(Ident(name), value) => + (name.decodedName.toString, c.eval(c.Expr[String](value))) + case _ => + c.abort( + c.enclosingPosition, + "@enableIfVer (and all the related version annotations) require named " + + s"""arguments, e.g. spark = ">=3.5.0"". got: ${showRaw(arg)}""") + } + } + } + + /** Require exactly one named dimension arg and return whether the build matches its range. */ + def singleKeep(c: whitebox.Context, specificMacroPrefix: String): Boolean = { + val ranges = namedRanges(c) + if (ranges.size != 1) { + val ifCase = if (specificMacroPrefix.isEmpty) "enableIf" else "If" + c.abort( + c.enclosingPosition, + s"@${specificMacroPrefix}${ifCase}Ver accepts exactly one dimension " + + s"(got ${ranges.size}).") + } + val (dim, range) = ranges.head + satisfies(range, versionOf(c, dim)) + } + + // ----- generic tree-rewrite expansions (take a precomputed keep) --------------------------- + + /** Keep the annotated member as-is when `keep`, otherwise drop it entirely. */ + def enable(c: whitebox.Context)(annottees: Seq[c.Expr[Any]])(keep: Boolean): c.Expr[Any] = { + import c.universe._ + if (keep) c.Expr[Any](q"..$annottees") + else c.Expr(EmptyTree) + } + + /** + * Keep the annotated member as-is when `keep`, otherwise remove the class body and the + * inheritance + */ + def implementIf(c: whitebox.Context)(annottees: Seq[c.Expr[Any]])( + keep: Boolean): c.Expr[Any] = { + import c.universe._ + + if (keep) return c.Expr[Any](q"..$annottees") + val head = annottees.head.tree match { + case ClassDef(mods, name, tparams, Template(_, self, _)) => + ClassDef(mods, name, tparams, Template(List(), self, List(EmptyTree))) + case ModuleDef(mods, name, Template(_, self, _)) => + ModuleDef(mods, name, Template(List(), self, List(EmptyTree))) + } + c.Expr(q"$head; ..${annottees.tail}") + } + + /** Keep the def/val as-is when `keep`. otherwise strip its `override` modifier. */ + def enableOverride(c: whitebox.Context)(annottees: Seq[c.Expr[Any]])( + keep: Boolean): c.Expr[Any] = { + import c.universe._ + import scala.reflect.internal.Flags + if (keep) return c.Expr[Any](q"..$annottees") + + val head = annottees.head.tree match { + case DefDef(mods, name, tparams, vparams, tpt, rhs) => + val newMods = Modifiers( + (mods.flags.asInstanceOf[Long] & ~Flags.OVERRIDE).asInstanceOf[FlagSet], + mods.privateWithin, + mods.annotations) + DefDef(newMods, name, tparams, vparams, tpt, rhs) + + case ValDef(mods, name, tpt, rhs) => + val newMods = Modifiers( + (mods.flags.asInstanceOf[Long] & ~Flags.OVERRIDE).asInstanceOf[FlagSet], + mods.privateWithin, + mods.annotations) + ValDef(newMods, name, tpt, rhs) + } + c.Expr(q"$head; ..${annottees.tail}") + } + } +} + +object enableIfVer { + object Macros { + def verEnable(c: whitebox.Context)(annottees: c.Expr[Any]*): c.Expr[Any] = + EnableIfVerSupport.Macros.enable(c)(annottees)(EnableIfVerSupport.Macros.singleKeep(c, "")) + def verImplementIf(c: whitebox.Context)(annottees: c.Expr[Any]*): c.Expr[Any] = + EnableIfVerSupport.Macros.implementIf(c)(annottees)( + EnableIfVerSupport.Macros.singleKeep(c, "implement")) + def verEnableOverride(c: whitebox.Context)(annottees: c.Expr[Any]*): c.Expr[Any] = + EnableIfVerSupport.Macros.enableOverride(c)(annottees)( + EnableIfVerSupport.Macros.singleKeep(c, "enableOverride")) + } +} + +/** + * Keep the annotated member only when the build matches a single dimension's range. otherwise + * drop it entirely. Exactly one dimension must be given as a named semver range. Known dimensions + * are whatever the build configures via `-Xmacro-settings` (currently `spark`). + * + * Example: + * {{{ + * @enableIfVer(spark = ">=3.5.0") // keep only on Spark 3.5+ + * def onlyOn35Plus(): Unit = ... + * + * @enableIfVer(spark = ">=3.4.0 <4.0.0") // keep on the 3.4 / 3.5 line, drop on 4.0 + * override protected def withNewChildInternal(c: SparkPlan): SparkPlan = copy(child = c) + * }}} + * + * Pre-release versions (e.g. `4.2.0-preview4`): ordinary ranges already match them (`>=4.0.0` and + * `4` both match `4.2.0-preview4`), and you can target one explicitly (`spark = "3.5 || + * 4.2.0-preview4"`). The one gotcha is a lower bound on the pre-release's OWN release, since a + * pre-release sorts below its release (`4.0.0-preview4 < 4.0.0`): + * {{{ + * @enableIfVer(spark = ">=4.0.0") // matches 4.2.0-preview4 & 4.0.0, but NOT 4.0.0-preview4 + * @enableIfVer(spark = ">=4.0.0-0") // also matches 4.0.0-preview4 ("-0" = lowest pre-release) + * @enableIfVer(spark = "4") // x-range: also matches 4.0.0-preview4 + * }}} + * So to gate "Spark 4.x including the 4.0.0 previews", use `">=4.0.0-0"` or `"4"`, not + * `">=4.0.0"`. + */ +@nowarn("cat=unused") // params are used by the macro +@compileTimeOnly("enable macro paradise to expand macro annotations") +final class enableIfVer(spark: String = "") extends StaticAnnotation { + def macroTransform(annottees: Any*): Any = macro enableIfVer.Macros.verEnable +} + +/** + * Like [[enableIfVer]], but on a non-matching version the class/object is KEPT (so the type still + * exists) with its body emptied. The inheritance is removed. Use this when the type must stay + * referenceable on every version but its body touches symbols that only exist on some versions. + * + * For example, `WindowGroupLimitExec` was added in Spark 3.5, so this does not compile on 3.4 - + * the top-level import and the parameter type are resolved on every version: + * {{{ + * import org.apache.spark.sql.execution.window.WindowGroupLimitExec + * + * class RunWithWGL { + * def run(a: WindowGroupLimitExec): Unit = ... + * } + * }}} + * + * Gate the class so its body (and the 3.5-only import, scoped inside it) only exists on 3.5+: + * {{{ + * @implementIfVer(spark = ">=3.5") + * class RunWithWGL { + * import org.apache.spark.sql.execution.window.WindowGroupLimitExec + * def run(a: WindowGroupLimitExec): Unit = ... + * } + * }}} + * + * this will also remove inheritance so `isSomething` won't be required to implement when only + * removing the class body + * {{{ + * abstract class Base { + * protected val isSomething: Boolean + * def run(): Unit = if (isSomething) println("something") + * } + * + * @implementIfVer(spark = ">=4") + * class OnlySpark4OrAbove extends Base { + * protected val isSomething: Boolean = false + * + * override def run(): Unit = println("only for spark 4+") // dropped on < 4.0 + * } + * + * // For spark below 4 the expanded code will be + * class OnlySpark4OrAbove { + * } + * }}} + */ +@nowarn("cat=unused") // params are used by the macro +@compileTimeOnly("enable macro paradise to expand macro annotations") +final class implementIfVer(spark: String = "") extends StaticAnnotation { + def macroTransform(annottees: Any*): Any = macro enableIfVer.Macros.verImplementIf +} + +/** + * Like [[enableIfVer]], but on a non-matching version the `override` modifier is stripped from + * the annotated def/val instead of removing it. Use this for a member that overrides a base + * member only on some versions (because the base member only exists there). + * + * Example: + * {{{ + * // `withNewChildInternal` only exists in the base on Spark 3.2+. On < 3.2 the `override` is + * // stripped and it becomes a plain (non-overriding) def, so it still compiles. + * @enableOverrideIfVer(spark = ">=3.2") + * override def withNewChildInternal(c: SparkPlan): SparkPlan = copy(child = c) + * }}} + */ +@nowarn("cat=unused") // params are used by the macro +@compileTimeOnly("enable macro paradise to expand macro annotations") +final class enableOverrideIfVer(spark: String = "") extends StaticAnnotation { + def macroTransform(annottees: Any*): Any = macro enableIfVer.Macros.verEnableOverride +} diff --git a/pom.xml b/pom.xml index 95444845e1..9d80df99b4 100644 --- a/pom.xml +++ b/pom.xml @@ -35,6 +35,7 @@ under the License. Comet Project Parent POM + enable-if-version-annotation-macros common spark spark-integration @@ -85,6 +86,12 @@ under the License. 33.2.1-jre 1.21.4 2.31.51 + 5.8.0 + + -Ymacro-annotations ${project.basedir}/../native/target/debug darwin x86_64 @@ -267,6 +274,14 @@ under the License. scala-library ${scala.version} + + + org.semver4j + semver4j + ${semver4j.version} + org.scala-lang.modules scala-collection-compat_${scala.binary.version} @@ -662,7 +677,31 @@ under the License. 11 ${java.version} ${java.version} + + -feature + + + + + net.alchim31.maven + scala-maven-plugin + + + + + org.scalamacros + paradise_${scala.version} + 2.1.1 + + + + + + + @@ -680,7 +719,30 @@ under the License. 11 ${java.version} ${java.version} + + -feature + + + + + net.alchim31.maven + scala-maven-plugin + + + + + org.scalamacros + paradise_${scala.version} + 2.1.1 + + + + + + + @@ -750,6 +812,9 @@ under the License. 2.12.18 2.12 4.8.8 + @@ -759,7 +824,26 @@ under the License. 2.13.16 2.13 4.13.6 + + -Ymacro-annotations + + + + + net.alchim31.maven + scala-maven-plugin + + + + + + + + @@ -811,6 +895,11 @@ under the License. -Ywarn-unused + + -Xmacro-settings:enableIfVer.spark=${spark.version} + ${scala.macros.annotations.arg} -source @@ -904,6 +993,12 @@ under the License. -feature -explaintypes -Xlint:adapted-args + + -Xmacro-settings:enableIfVer.spark=${spark.version} + + ${scala.macros.annotations.arg} -Xms1024m diff --git a/spark/pom.xml b/spark/pom.xml index 7dd3c6fe33..6bc294f21a 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -44,6 +44,22 @@ under the License. comet-common-spark${spark.version.short}_${scala.binary.version} ${project.version} + + + org.apache.datafusion + comet-enable-if-version-annotation-macros-spark${spark.version.short}_${scala.binary.version} + ${project.version} + provided + + + + org.semver4j + semver4j + provided + org.apache.spark spark-sql_${scala.binary.version} diff --git a/spark/src/main/scala/org/apache/comet/CometConf.scala b/spark/src/main/scala/org/apache/comet/CometConf.scala index aabd64b9b3..439990851d 100644 --- a/spark/src/main/scala/org/apache/comet/CometConf.scala +++ b/spark/src/main/scala/org/apache/comet/CometConf.scala @@ -29,8 +29,6 @@ import org.apache.spark.network.util.JavaUtils import org.apache.spark.sql.comet.util.Utils import org.apache.spark.sql.internal.SQLConf -import org.apache.comet.shims.ShimCometConf - /** * Configurations for a Comet application. Mostly inspired by [[SQLConf]] in Spark. * @@ -43,7 +41,7 @@ import org.apache.comet.shims.ShimCometConf * which retrieves the config value from the thread-local [[SQLConf]] object. Alternatively, you * can also explicitly pass a [[SQLConf]] object to the `get` method. */ -object CometConf extends ShimCometConf { +object CometConf { val COMPAT_GUIDE: String = "For more information, refer to the Comet Compatibility " + "Guide (https://datafusion.apache.org/comet/user-guide/compatibility.html)" @@ -852,6 +850,26 @@ object CometConf extends ShimCometConf { val COMET_OPERATOR_DATA_WRITING_COMMAND_ALLOW_INCOMPAT: ConfigEntry[Boolean] = createOperatorIncompatConfig("DataWritingCommandExec") + /** + * Whether Comet's Parquet scan paths allow widening type promotions (e.g. INT32 -> INT64, FLOAT + * -> DOUBLE, INT32 -> DOUBLE). Reads from the deprecated `spark.comet.schemaEvolution.enabled` + * SQL conf were removed in favor of this per-version constant; see #4298. + */ + val COMET_SCHEMA_EVOLUTION_ENABLED: Boolean = isCometSchemaEvolutionEnabled + + /** + * Spark 3.x's vectorized reader rejects these on read, so Comet matches by defaulting to false + * on 3.x + */ + @enableIfVer(spark = "3") + private def isCometSchemaEvolutionEnabled: Boolean = false + + /** + * Spark 4.x's reader accepts them, so it defaults to true + */ + @enableIfVer(spark = "4") + private def isCometSchemaEvolutionEnabled: Boolean = true + /** Create a config to enable a specific operator */ private def createExecEnabledConfig( exec: String, diff --git a/spark/src/main/spark-3.5/org/apache/comet/shims/ShimFileFormat.scala b/spark/src/main/scala/org/apache/comet/shims/ShimFileFormat.scala similarity index 56% rename from spark/src/main/spark-3.5/org/apache/comet/shims/ShimFileFormat.scala rename to spark/src/main/scala/org/apache/comet/shims/ShimFileFormat.scala index 1702db135a..fd062d2d3c 100644 --- a/spark/src/main/spark-3.5/org/apache/comet/shims/ShimFileFormat.scala +++ b/spark/src/main/scala/org/apache/comet/shims/ShimFileFormat.scala @@ -19,15 +19,36 @@ package org.apache.comet.shims -import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat -import org.apache.spark.sql.execution.datasources.parquet.ParquetRowIndexUtil import org.apache.spark.sql.types.StructType +import org.apache.comet.enableIfVer + object ShimFileFormat { // A name for a temporary column that holds row indexes computed by the file format reader // until they can be placed in the _metadata struct. - val ROW_INDEX_TEMPORARY_COLUMN_NAME = ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME + val ROW_INDEX_TEMPORARY_COLUMN_NAME: String = getRowIndexTemporaryColumnName + + @enableIfVer(spark = "<3.5.0") + private def getRowIndexTemporaryColumnName: String = { + import org.apache.spark.sql.execution.datasources.FileFormat + FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME + } + + @enableIfVer(spark = ">=3.5.0") + private def getRowIndexTemporaryColumnName: String = { + import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat + ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME + } + + @enableIfVer(spark = "<3.5.0") + def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int = { + import org.apache.spark.sql.execution.datasources.RowIndexUtil + RowIndexUtil.findRowIndexColumnIndexInSchema(sparkSchema) + } - def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int = + @enableIfVer(spark = ">=3.5.0") + def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int = { + import org.apache.spark.sql.execution.datasources.parquet.ParquetRowIndexUtil ParquetRowIndexUtil.findRowIndexColumnIndexInSchema(sparkSchema) + } } diff --git a/spark/src/main/spark-3.4/org/apache/comet/shims/ShimFileFormat.scala b/spark/src/main/spark-3.4/org/apache/comet/shims/ShimFileFormat.scala deleted file mode 100644 index 7b4911e81f..0000000000 --- a/spark/src/main/spark-3.4/org/apache/comet/shims/ShimFileFormat.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.comet.shims - -import org.apache.spark.sql.execution.datasources.{FileFormat, RowIndexUtil} -import org.apache.spark.sql.types.StructType - -object ShimFileFormat { - - // A name for a temporary column that holds row indexes computed by the file format reader - // until they can be placed in the _metadata struct. - val ROW_INDEX_TEMPORARY_COLUMN_NAME: String = FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME - - def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int = - RowIndexUtil.findRowIndexColumnIndexInSchema(sparkSchema) -} diff --git a/spark/src/main/spark-3.x/org/apache/comet/shims/ShimCometConf.scala b/spark/src/main/spark-3.x/org/apache/comet/shims/ShimCometConf.scala deleted file mode 100644 index a113893c84..0000000000 --- a/spark/src/main/spark-3.x/org/apache/comet/shims/ShimCometConf.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.comet.shims - -trait ShimCometConf { - - /** - * Whether Comet's Parquet scan paths allow widening type promotions (e.g. INT32 → INT64, FLOAT - * → DOUBLE). Spark 3.x's vectorized reader rejects these on read, so Comet matches by - * defaulting to false on 3.x. Reads from the deprecated `spark.comet.schemaEvolution.enabled` - * SQL conf were removed in favor of this per-version constant; see #4298. - */ - val COMET_SCHEMA_EVOLUTION_ENABLED: Boolean = false -} diff --git a/spark/src/main/spark-4.x/org/apache/comet/shims/ShimCometConf.scala b/spark/src/main/spark-4.x/org/apache/comet/shims/ShimCometConf.scala deleted file mode 100644 index e89b37d604..0000000000 --- a/spark/src/main/spark-4.x/org/apache/comet/shims/ShimCometConf.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.comet.shims - -trait ShimCometConf { - - /** - * Whether Comet's Parquet scan paths allow widening type promotions (e.g. INT32 → INT64, FLOAT - * → DOUBLE, INT32 → DOUBLE). Spark 4.x's vectorized reader accepts these by default. Reads from - * the deprecated `spark.comet.schemaEvolution.enabled` SQL conf were removed in favor of this - * per-version constant; see #4298. - */ - val COMET_SCHEMA_EVOLUTION_ENABLED: Boolean = true -} diff --git a/spark/src/main/spark-4.x/org/apache/comet/shims/ShimFileFormat.scala b/spark/src/main/spark-4.x/org/apache/comet/shims/ShimFileFormat.scala deleted file mode 100644 index 1702db135a..0000000000 --- a/spark/src/main/spark-4.x/org/apache/comet/shims/ShimFileFormat.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.comet.shims - -import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat -import org.apache.spark.sql.execution.datasources.parquet.ParquetRowIndexUtil -import org.apache.spark.sql.types.StructType - -object ShimFileFormat { - // A name for a temporary column that holds row indexes computed by the file format reader - // until they can be placed in the _metadata struct. - val ROW_INDEX_TEMPORARY_COLUMN_NAME = ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME - - def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int = - ParquetRowIndexUtil.findRowIndexColumnIndexInSchema(sparkSchema) -}