fix: [Spark 4.1.1] preserve stored allowDecimalPrecisionLoss in DecimalPrecision rule (#4179)

andygrove · web-flow · commit 06d565afbcb1 · 2026-05-04T19:31:42.000-06:00
diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml
@@ -370,6 +370,7 @@ jobs:
               org.apache.spark.sql.comet.CometTaskMetricsSuite
               org.apache.spark.sql.comet.CometDppFallbackRepro3949Suite
               org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite
+              org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite
               org.apache.comet.objectstore.NativeConfigSuite
           - name: "expressions"
             value: |
diff --git a/.github/workflows/pr_build_macos.yml b/.github/workflows/pr_build_macos.yml
@@ -209,6 +209,7 @@ jobs:
               org.apache.spark.sql.comet.CometTaskMetricsSuite
               org.apache.spark.sql.comet.CometDppFallbackRepro3949Suite
               org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite
+              org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite
               org.apache.comet.objectstore.NativeConfigSuite
           - name: "expressions"
             value: |
diff --git a/dev/diffs/4.1.1.diff b/dev/diffs/4.1.1.diff
@@ -721,7 +721,7 @@ index 6df8d66ee7f..35e270c7241 100644
      assert(exchanges.size == 2)
    }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
-index e1a2fd33c7c..9a93daa8f5a 100644
+index e1a2fd33c7c..632f4b695df 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
 @@ -22,6 +22,7 @@ import org.scalatest.GivenWhenThen
@@ -781,43 +781,15 @@ index e1a2fd33c7c..9a93daa8f5a 100644
  
        assert(countSubqueryBroadcasts == 1)
        assert(countReusedSubqueryBroadcasts == 1)
-@@ -1215,7 +1231,8 @@ abstract class DynamicPartitionPruningSuiteBase
-   }
- 
-   test("SPARK-32509: Unused Dynamic Pruning filter shouldn't affect " +
--    "canonicalization and exchange reuse") {
-+    "canonicalization and exchange reuse",
-+    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/4045")) {
-     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
-       withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-           SQLConf.V2_BUCKETING_ENABLED.key -> "false") {
-@@ -1331,6 +1348,7 @@ abstract class DynamicPartitionPruningSuiteBase
-   }
- 
-   test("Subquery reuse across the whole plan",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3313"),
-     DisableAdaptiveExecution("DPP in AQE must reuse broadcast")) {
-     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
-       SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
-@@ -1425,7 +1443,8 @@ abstract class DynamicPartitionPruningSuiteBase
-     }
-   }
- 
--  test("SPARK-34637: DPP side broadcast query stage is created firstly") {
-+  test("SPARK-34637: DPP side broadcast query stage is created firstly",
-+    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/4045")) {
-     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
-       val df = sql(
-         """ WITH v as (
-@@ -1579,6 +1598,7 @@ abstract class DynamicPartitionPruningSuiteBase
+@@ -1579,6 +1595,7 @@ abstract class DynamicPartitionPruningSuiteBase
  
          val subqueryBroadcastExecs = collectWithSubqueries(df.queryExecution.executedPlan) {
            case s: SubqueryBroadcastExec => s
 +          case s: CometSubqueryBroadcastExec => s
          }
          assert(subqueryBroadcastExecs.size === 1)
          subqueryBroadcastExecs.foreach { subqueryBroadcastExec =>
-@@ -1731,6 +1751,10 @@ abstract class DynamicPartitionPruningV1Suite extends DynamicPartitionPruningDat
+@@ -1731,6 +1748,10 @@ abstract class DynamicPartitionPruningV1Suite extends DynamicPartitionPruningDat
                case s: BatchScanExec =>
                  // we use f1 col for v2 tables due to schema pruning
                  s.output.exists(_.exists(_.argString(maxFields = 100).contains("f1")))
@@ -854,41 +826,18 @@ index b27122a8de2..a4c5aac8212 100644
  
    test("SPARK-35884: Explain Formatted") {
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
-index 95e86fe4311..0f7ed3271d4 100644
+index 95e86fe4311..fb2b6363af6 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
-@@ -33,6 +33,8 @@ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GreaterTha
+@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GreaterTha
  import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt}
  import org.apache.spark.sql.catalyst.plans.logical.Filter
  import org.apache.spark.sql.catalyst.types.DataTypeUtils
-+import org.apache.spark.sql.catalyst.util.quietly
 +import org.apache.spark.sql.comet.{CometBatchScanExec, CometNativeScanExec, CometScanExec, CometSortMergeJoinExec}
  import org.apache.spark.sql.execution.{FileSourceScanLike, SimpleMode}
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
  import org.apache.spark.sql.execution.datasources.FilePartition
-@@ -204,7 +206,11 @@ class FileBasedDataSourceSuite extends QueryTest
-   }
- 
-   allFileBasedDataSources.foreach { format =>
--    testQuietly(s"Enabling/disabling ignoreMissingFiles using $format") {
-+    val ignoreMissingTags: Seq[org.scalatest.Tag] = if (format == "parquet") {
-+      Seq(IgnoreCometNativeDataFusion(
-+        "https://github.com/apache/datafusion-comet/issues/3314"))
-+    } else Seq.empty
-+    test(s"Enabling/disabling ignoreMissingFiles using $format", ignoreMissingTags: _*) { quietly {
-       def testIgnoreMissingFiles(options: Map[String, String]): Unit = {
-         withTempDir { dir =>
-           val basePath = dir.getCanonicalPath
-@@ -264,7 +270,7 @@ class FileBasedDataSourceSuite extends QueryTest
-           }
-         }
-       }
--    }
-+    }}
-   }
- 
-   Seq("json", "orc").foreach { format =>
-@@ -655,18 +661,25 @@ class FileBasedDataSourceSuite extends QueryTest
+@@ -655,18 +656,25 @@ class FileBasedDataSourceSuite extends QueryTest
              checkAnswer(sql(s"select A from $tableName"), data.select("A"))
  
              // RuntimeException is triggered at executor side, which is then wrapped as
@@ -921,31 +870,31 @@ index 95e86fe4311..0f7ed3271d4 100644
                condition = "_LEGACY_ERROR_TEMP_2093",
                parameters = Map("requiredFieldName" -> "b", "matchedOrcFields" -> "[b, B]")
              )
-@@ -954,6 +967,7 @@ class FileBasedDataSourceSuite extends QueryTest
+@@ -954,6 +962,7 @@ class FileBasedDataSourceSuite extends QueryTest
              assert(bJoinExec.isEmpty)
              val smJoinExec = collect(joinedDF.queryExecution.executedPlan) {
                case smJoin: SortMergeJoinExec => smJoin
 +              case smJoin: CometSortMergeJoinExec => smJoin
              }
              assert(smJoinExec.nonEmpty)
            }
-@@ -1014,6 +1028,7 @@ class FileBasedDataSourceSuite extends QueryTest
+@@ -1014,6 +1023,7 @@ class FileBasedDataSourceSuite extends QueryTest
  
            val fileScan = df.queryExecution.executedPlan collectFirst {
              case BatchScanExec(_, f: FileScan, _, _, _, _) => f
 +            case CometBatchScanExec(BatchScanExec(_, f: FileScan, _, _, _, _), _, _) => f
            }
            assert(fileScan.nonEmpty)
            assert(fileScan.get.partitionFilters.nonEmpty)
-@@ -1055,6 +1070,7 @@ class FileBasedDataSourceSuite extends QueryTest
+@@ -1055,6 +1065,7 @@ class FileBasedDataSourceSuite extends QueryTest
  
            val fileScan = df.queryExecution.executedPlan collectFirst {
              case BatchScanExec(_, f: FileScan, _, _, _, _) => f
 +            case CometBatchScanExec(BatchScanExec(_, f: FileScan, _, _, _, _), _, _) => f
            }
            assert(fileScan.nonEmpty)
            assert(fileScan.get.partitionFilters.isEmpty)
-@@ -1239,6 +1255,9 @@ class FileBasedDataSourceSuite extends QueryTest
+@@ -1239,6 +1250,9 @@ class FileBasedDataSourceSuite extends QueryTest
            val filters = df.queryExecution.executedPlan.collect {
              case f: FileSourceScanLike => f.dataFilters
              case b: BatchScanExec => b.scan.asInstanceOf[FileScan].dataFilters
@@ -2020,30 +1969,6 @@ index 47679ed7865..9ffbaecb98e 100644
      }.length == hashAggCount)
      assert(collectWithSubqueries(plan) { case s: SortAggregateExec => s }.length == sortAggCount)
    }
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
-index 050a004a935..96d982f2829 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
-@@ -1054,7 +1054,8 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
-     }
-   }
- 
--  test("alter temporary view should follow current storeAnalyzedPlanForView config") {
-+  test("alter temporary view should follow current storeAnalyzedPlanForView config",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3314")) {
-     withTable("t") {
-       Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
-       withView("v1") {
-@@ -1334,7 +1335,8 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
-     }
-   }
- 
--  test("SPARK-53968 reading the view after allowPrecisionLoss is changed") {
-+  test("SPARK-53968 reading the view after allowPrecisionLoss is changed",
-+    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/4124")) {
-     import org.apache.spark.sql.internal.SQLConf
-     val partsTableName = "parts_tbl"
-     val ordersTableName = "orders_tbl"
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
 index aed11badb71..1a365b5aacf 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
@@ -3101,7 +3026,7 @@ index 3072657a095..b2293ccab17 100644
        checkAnswer(
          // "fruit" column in this file is encoded using DELTA_LENGTH_BYTE_ARRAY.
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
-index c530dc0d3df..abf36a7ab09 100644
+index c530dc0d3df..418d5ea4b4d 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
 @@ -27,6 +27,7 @@ import org.apache.parquet.hadoop.ParquetOutputFormat
@@ -3122,17 +3047,7 @@ index c530dc0d3df..abf36a7ab09 100644
      val providedSchema = StructType(Seq(StructField("time", TimestampNTZType, false)))
  
      Seq("INT96", "TIMESTAMP_MICROS", "TIMESTAMP_MILLIS").foreach { tsType =>
-@@ -318,7 +320,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
-     }
-   }
- 
--  test("Enabling/disabling ignoreCorruptFiles") {
-+  test("Enabling/disabling ignoreCorruptFiles",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3314")) {
-     def testIgnoreCorruptFiles(options: Map[String, String]): Unit = {
-       withTempDir { dir =>
-         val basePath = dir.getCanonicalPath
-@@ -996,7 +999,11 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -996,7 +998,11 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
          Seq(Some("A"), Some("A"), None).toDF().repartition(1)
            .write.parquet(path.getAbsolutePath)
          val df = spark.read.parquet(path.getAbsolutePath)
@@ -3145,7 +3060,7 @@ index c530dc0d3df..abf36a7ab09 100644
        }
      }
    }
-@@ -1042,7 +1049,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1042,7 +1048,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
      testMigration(fromTsType = "TIMESTAMP_MICROS", toTsType = "INT96")
    }
  
@@ -3155,7 +3070,7 @@ index c530dc0d3df..abf36a7ab09 100644
      def readParquet(schema: String, path: File): DataFrame = {
        spark.read.schema(schema).parquet(path.toString)
      }
-@@ -1060,7 +1068,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1060,7 +1067,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
          checkAnswer(readParquet(schema2, path), df)
        }
  
@@ -3165,7 +3080,7 @@ index c530dc0d3df..abf36a7ab09 100644
          val schema1 = "a DECIMAL(3, 2), b DECIMAL(18, 3), c DECIMAL(37, 3)"
          checkAnswer(readParquet(schema1, path), df)
          val schema2 = "a DECIMAL(3, 0), b DECIMAL(18, 1), c DECIMAL(37, 1)"
-@@ -1084,7 +1093,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1084,7 +1092,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
        val df = sql(s"SELECT 1 a, 123456 b, ${Int.MaxValue.toLong * 10} c, CAST('1.2' AS BINARY) d")
        df.write.parquet(path.toString)
  
@@ -3175,7 +3090,7 @@ index c530dc0d3df..abf36a7ab09 100644
          checkAnswer(readParquet("a DECIMAL(3, 2)", path), sql("SELECT 1.00"))
          checkAnswer(readParquet("a DECIMAL(11, 2)", path), sql("SELECT 1.00"))
          checkAnswer(readParquet("b DECIMAL(3, 2)", path), Row(null))
-@@ -1131,7 +1141,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1131,7 +1140,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
      }
    }
  
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -585,9 +585,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
       inputs: Seq[Attribute],
       binding: Boolean = true): Option[Expr] = {
 
-    val conf = SQLConf.get
-    val newExpr =
-      DecimalPrecision.promote(conf.decimalOperationsAllowPrecisionLoss, expr, !conf.ansiEnabled)
+    val newExpr = DecimalPrecision.promote(expr, !SQLConf.get.ansiEnabled)
     exprToProtoInternal(newExpr, inputs, binding)
   }
 
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/DecimalPrecision.scala b/spark/src/main/scala/org/apache/spark/sql/comet/DecimalPrecision.scala
diff --git a/spark/src/test/spark-4.1/org/apache/spark/sql/comet/CometDecimalArithmeticViewSuite.scala b/spark/src/test/spark-4.1/org/apache/spark/sql/comet/CometDecimalArithmeticViewSuite.scala

Original file line number	Diff line number	Diff line change
`@@ -585,9 +585,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {`
`585`	`585`	`inputs: Seq[Attribute],`
`586`	`586`	`binding: Boolean = true): Option[Expr] = {`
`587`	`587`
`588`		`- val conf = SQLConf.get`
`589`		`- val newExpr =`
`590`		`- DecimalPrecision.promote(conf.decimalOperationsAllowPrecisionLoss, expr, !conf.ansiEnabled)`
	`588`	`+ val newExpr = DecimalPrecision.promote(expr, !SQLConf.get.ansiEnabled)`
`591`	`589`	`exprToProtoInternal(newExpr, inputs, binding)`
`592`	`590`	`}`
`593`	`591`