coderfender
diff --git a/‎.github/workflows/spark_sql_test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/spark_sql_test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/diffs/3.4.3.diff‎
Lines changed: 51 additions & 6 deletions b/‎dev/diffs/3.4.3.diff‎
Lines changed: 51 additions & 6 deletions
@@ -155,7 +155,7 @@ jobs:
         run: |
           cd apache-spark
           rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
-          NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=${{ matrix.config.scan-impl }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
+          NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ENABLE_COMET_WRITER=true ${{ matrix.config.scan-env }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
             build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
           if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
             find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
 
@@ -2092,7 +2092,7 @@ index 104b4e416cd..37ea65081e4 100644
          case _ =>
            throw new AnalysisException("Can not match ParquetTable in the query.")
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
-index 8670d95c65e..b624c3811dd 100644
+index 8670d95c65e..3fe49802309 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -1335,7 +1335,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
@@ -2105,6 +2105,41 @@ index 8670d95c65e..b624c3811dd 100644
      withAllParquetReaders {
        checkAnswer(
          // "fruit" column in this file is encoded using DELTA_LENGTH_BYTE_ARRAY.
+@@ -1541,7 +1542,9 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
+     }
+   }
+ 
+-  test("Write Spark version into Parquet metadata") {
++//  TODO : Comet native writer to add spark / comet version into parquet metadata
++  test("Write Spark version into Parquet metadata",
++    IgnoreComet("Comet doesn't support DELTA encoding yet")) {
+     withTempPath { dir =>
+       spark.range(1).repartition(1).write.parquet(dir.getAbsolutePath)
+       assert(getMetaData(dir)(SPARK_VERSION_METADATA_KEY) === SPARK_VERSION_SHORT)
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+index 8b386e8f689..28ced6209e0 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+@@ -25,7 +25,7 @@ import org.apache.hadoop.fs.{Path, PathFilter}
+ import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER
+ import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
+ 
+-import org.apache.spark.sql.Row
++import org.apache.spark.sql.{IgnoreComet, Row}
+ import org.apache.spark.sql.catalyst.util.DateTimeUtils
+ import org.apache.spark.sql.internal.SQLConf
+ import org.apache.spark.sql.test.SharedSparkSession
+@@ -153,7 +153,9 @@ class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedS
+     }
+   }
+ 
+-  test("parquet timestamp conversion") {
++  //  TODO : Support legacy timestamps conversion /cast in comet native writer
++  test("parquet timestamp conversion",
++    IgnoreComet("timestamp96 conversion failed with the native writer")) {
+     // Make a table with one parquet file written by impala, and one parquet file written by spark.
+     // We should only adjust the timestamps in the impala file, and only if the conf is set
+     val impalaFile = "test-data/impala_timestamp.parq"
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
 index 29cb224c878..44837aa953b 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -2798,7 +2833,7 @@ index abe606ad9c1..2d930b64cca 100644
      val tblTargetName = "tbl_target"
      val tblSourceQualified = s"default.$tblSourceName"
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
-index dd55fcfe42c..a1d390c93d0 100644
+index dd55fcfe42c..e898fc33bab 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -27,6 +27,7 @@ import scala.concurrent.duration._
@@ -2856,7 +2891,7 @@ index dd55fcfe42c..a1d390c93d0 100644
      }
    }
 
-@@ -242,6 +265,29 @@ private[sql] trait SQLTestUtilsBase
+@@ -242,6 +265,34 @@ private[sql] trait SQLTestUtilsBase
      protected override def _sqlContext: SQLContext = self.spark.sqlContext
    }
 
@@ -2882,11 +2917,16 @@ index dd55fcfe42c..a1d390c93d0 100644
 +    val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
 +    v != null && v.toBoolean
 +  }
++
++  protected def isCometWriterEnabled: Boolean = {
++    val v = System.getenv("ENABLE_COMET_WRITER")
++    v != null && v.toBoolean
++  }
 +
    protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
      SparkSession.setActiveSession(spark)
      super.withSQLConf(pairs: _*)(f)
-@@ -434,6 +480,8 @@ private[sql] trait SQLTestUtilsBase
+@@ -434,6 +485,8 @@ private[sql] trait SQLTestUtilsBase
      val schema = df.schema
      val withoutFilters = df.queryExecution.executedPlan.transform {
        case FilterExec(_, child) => child
@@ -2896,10 +2936,10 @@ index dd55fcfe42c..a1d390c93d0 100644
 
      spark.internalCreateDataFrame(withoutFilters.execute(), schema)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-index ed2e309fa07..a5ea58146ad 100644
+index ed2e309fa07..9c5c393ad14 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-@@ -74,6 +74,31 @@ trait SharedSparkSessionBase
+@@ -74,6 +74,36 @@ trait SharedSparkSessionBase
        // this rule may potentially block testing of other optimization rules such as
        // ConstantPropagation etc.
        .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
@@ -2927,6 +2967,11 @@ index ed2e309fa07..a5ea58146ad 100644
 +        conf
 +          .set("spark.sql.ansi.enabled", "true")
 +      }
++
++      if (isCometWriterEnabled) {
++        conf.set("spark.comet.parquet.write.enabled", "true")
++        conf.set("spark.comet.operator.DataWritingCommandExec.allowIncompatible", "true")
++      }
 +    }
      conf.set(
        StaticSQLConf.WAREHOUSE_PATH,