apache
diff --git a/‎.github/actions/java-test/action.yaml‎
Lines changed: 0 additions & 6 deletions b/‎.github/actions/java-test/action.yaml‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎.github/workflows/pr_build_linux.yml‎
Lines changed: 2 additions & 9 deletions b/‎.github/workflows/pr_build_linux.yml‎
Lines changed: 2 additions & 9 deletions
diff --git a/‎.github/workflows/spark_sql_test.yml‎
Lines changed: 8 additions & 10 deletions b/‎.github/workflows/spark_sql_test.yml‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎.github/workflows/spark_sql_test_native_iceberg_compat.yml‎
Lines changed: 0 additions & 72 deletions b/‎.github/workflows/spark_sql_test_native_iceberg_compat.yml‎
Lines changed: 0 additions & 72 deletions
diff --git a/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 10 additions & 9 deletions b/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 10 additions & 9 deletions
diff --git a/‎docs/source/contributor-guide/bug_triage.md‎
Lines changed: 3 additions & 5 deletions b/‎docs/source/contributor-guide/bug_triage.md‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎docs/source/user-guide/latest/datasources.md‎
Lines changed: 4 additions & 8 deletions b/‎docs/source/user-guide/latest/datasources.md‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala‎
Lines changed: 2 additions & 23 deletions b/‎spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala‎
Lines changed: 2 additions & 23 deletions
diff --git a/‎spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala‎
Lines changed: 1 addition & 1 deletion b/‎spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎spark/src/test/scala/org/apache/comet/CometCsvExpressionSuite.scala‎
Lines changed: 1 addition & 1 deletion b/‎spark/src/test/scala/org/apache/comet/CometCsvExpressionSuite.scala‎
Lines changed: 1 addition & 1 deletion
@@ -29,10 +29,6 @@ inputs:
     description: 'Maven options passed to the mvn command'
     required: false
     default: ''
-  scan_impl:
-    description: 'The default Parquet scan implementation'
-    required: false
-    default: 'auto'
   upload-test-reports:
     description: 'Whether to upload test results including coverage to GitHub'
     required: false
@@ -72,7 +68,6 @@ runs:
       shell: bash
       if: ${{ inputs.suites == '' }}
       env:
-        COMET_PARQUET_SCAN_IMPL: ${{ inputs.scan_impl }}
         SPARK_LOCAL_HOSTNAME: "localhost"
         SPARK_LOCAL_IP: "127.0.0.1"
       run: |
@@ -81,7 +76,6 @@ runs:
       shell: bash
       if: ${{ inputs.suites != '' }}
       env:
-        COMET_PARQUET_SCAN_IMPL: ${{ inputs.scan_impl }}
         SPARK_LOCAL_HOSTNAME: "localhost"
         SPARK_LOCAL_IP: "127.0.0.1"
       run: |
 
@@ -285,32 +285,26 @@ jobs:
           - name: "Spark 3.4, JDK 11, Scala 2.12"
             java_version: "11"
             maven_opts: "-Pspark-3.4 -Pscala-2.12"
-            scan_impl: "auto"
 
           - name: "Spark 3.5, JDK 17, Scala 2.13"
             java_version: "17"
             maven_opts: "-Pspark-3.5 -Pscala-2.13"
-            scan_impl: "native_iceberg_compat"
 
           - name: "Spark 4.0, JDK 17"
             java_version: "17"
             maven_opts: "-Pspark-4.0"
-            scan_impl: "auto"
 
           - name: "Spark 4.0, JDK 21"
             java_version: "21"
             maven_opts: "-Pspark-4.0"
-            scan_impl: "auto"
 
           - name: "Spark 4.1, JDK 17"
             java_version: "17"
             maven_opts: "-Pspark-4.1"
-            scan_impl: "auto"
 
           - name: "Spark 4.2, JDK 17"
             java_version: "17"
             maven_opts: "-Pspark-4.2"
-            scan_impl: "auto"
         suite:
           - name: "fuzz"
             value: |
@@ -399,7 +393,7 @@ jobs:
               org.apache.spark.sql.CometToPrettyStringSuite
               org.apache.spark.sql.CometCollationSuite
       fail-fast: false
-    name: ${{ matrix.profile.name }}/${{ matrix.profile.scan_impl }} [${{ matrix.suite.name }}]
+    name: ${{ matrix.profile.name }} [${{ matrix.suite.name }}]
     runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }}
     container:
       image: amd64/rust
@@ -437,10 +431,9 @@ jobs:
       - name: Java test steps
         uses: ./.github/actions/java-test
         with:
-          artifact_name: ${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}-${{ matrix.profile.scan_impl }}
+          artifact_name: ${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
           suites: ${{ matrix.suite.name == 'sql' && matrix.profile.name == 'Spark 3.4, JDK 11, Scala 2.12' && '' || matrix.suite.value }}
           maven_opts: ${{ matrix.profile.maven_opts }}
-          scan_impl: ${{ matrix.profile.scan_impl }}
           upload-test-reports: true
           skip-native-build: true
 
 
@@ -132,16 +132,14 @@ jobs:
           - {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
           - {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
           - {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
-        # Since 4f5eaf0, auto mode uses native_datafusion for V1 scans,
-        # so we only need to test with auto.
         config:
-          - {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto'}
-          - {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto'}
-          - {spark-short: '4.0', spark-full: '4.0.2', java: 17, scan-impl: 'auto'}
-          - {spark-short: '4.0', spark-full: '4.0.2', java: 21, scan-impl: 'auto'}
-          - {spark-short: '4.1', spark-full: '4.1.1', java: 17, scan-impl: 'auto'}
+          - {spark-short: '3.4', spark-full: '3.4.3', java: 11}
+          - {spark-short: '3.5', spark-full: '3.5.8', java: 11}
+          - {spark-short: '4.0', spark-full: '4.0.2', java: 17}
+          - {spark-short: '4.0', spark-full: '4.0.2', java: 21}
+          - {spark-short: '4.1', spark-full: '4.1.1', java: 17}
       fail-fast: false
-    name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}-jdk${{ matrix.config.java }}
+    name: spark-sql-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}-jdk${{ matrix.config.java }}
     # Hive tests stay on the standard GitHub-hosted runner: HiveSparkSubmitSuite
     # relies on an Ivy 'local-m2-cache' resolver that the runs-on.com
     # ubuntu24-full-x64 image does not provide, so spark-submit fails there.
@@ -171,7 +169,7 @@ jobs:
         run: |
           cd apache-spark
           rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
-          NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=${{ matrix.config.scan-impl }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
+          NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
             build/sbt -Dsbt.log.noformat=true -mem 6144 ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
           if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
             find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
@@ -190,7 +188,7 @@ jobs:
         if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
         uses: actions/upload-artifact@v7
         with:
-          name: fallback-log-spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }}-jdk${{ matrix.config.java }}
+          name: fallback-log-spark-sql-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }}-jdk${{ matrix.config.java }}
           path: "**/fallback.log"
 
   merge-fallback-logs:
 
@@ -111,22 +111,23 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithEnvVarOrDefault("ENABLE_COMET_WRITE", false)
 
+  @deprecated
   val SCAN_NATIVE_DATAFUSION = "native_datafusion"
+
+  @deprecated
   val SCAN_NATIVE_ICEBERG_COMPAT = "native_iceberg_compat"
+
+  @deprecated
   val SCAN_AUTO = "auto"
 
+  @deprecated
   val COMET_NATIVE_SCAN_IMPL: ConfigEntry[String] = conf("spark.comet.scan.impl")
-    .category(CATEGORY_PARQUET)
-    .doc(
-      "The implementation of Comet's Parquet scan to use. Available scans are " +
-        s"`$SCAN_NATIVE_DATAFUSION`, and `$SCAN_NATIVE_ICEBERG_COMPAT`. " +
-        s"`$SCAN_NATIVE_DATAFUSION` is a fully native implementation, and " +
-        s"`$SCAN_NATIVE_ICEBERG_COMPAT` is a hybrid implementation that supports some " +
-        "additional features, such as row indexes and field ids. " +
-        s"`$SCAN_AUTO` (default) chooses the best available scan based on the scan schema.")
+    .category(CATEGORY_TESTING)
+    .internal()
+    .doc("This configuration option is deprecated and has no effect on Comet behavior.")
     .stringConf
     .transform(_.toLowerCase(Locale.ROOT))
-    .checkValues(Set(SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT, SCAN_AUTO))
+    .checkValues(Set(SCAN_NATIVE_DATAFUSION, SCAN_AUTO))
     .createWithEnvVarOrDefault("COMET_PARQUET_SCAN_IMPL", SCAN_AUTO)
 
   val COMET_ICEBERG_NATIVE_ENABLED: ConfigEntry[Boolean] =
 
@@ -73,8 +73,7 @@ help contributors find bugs in their area of expertise.
 | `area:ffi`         | Arrow FFI / JNI boundary                  |
 | `area:ci`          | CI/CD, GitHub Actions, build tooling      |
 
-The following pre-existing labels also serve as area indicators: `native_datafusion`,
-`native_iceberg_compat`, `spark 4`, `spark sql tests`.
+The following pre-existing labels also serve as area indicators: `spark 4`, `spark sql tests`.
 
 ## Triage Process
 
@@ -109,9 +108,8 @@ Periodically review open bugs to ensure priorities are still accurate:
    crashes, because crashes are at least visible.
 2. **User-reported over test-only.** A bug hit by a real user on a real workload takes priority
    over one found only in test suites.
-3. **Core path over experimental.** Bugs in the default scan mode (`native_comet`) or widely-used
-   expressions take priority over bugs in experimental features like `native_datafusion` or
-   `native_iceberg_compat`.
+3. **Core path over experimental.** Bugs in widely-used expressions and operators take priority over
+   bugs in experimental features.
 4. **Production safety over feature completeness.** Fixing a data corruption bug is more important
    than adding support for a new expression.
 
 
@@ -61,11 +61,9 @@ Comet supports most standard storage systems, such as local file system and obje
 
 Apache DataFusion Comet native reader seamlessly scans files from remote HDFS for [supported formats](#supported-spark-data-sources)
 
-### Using experimental native DataFusion reader
+### Building Comet with HDFS support
 
-Unlike to native Comet reader the Datafusion reader fully supports nested types processing. This reader is currently experimental only
-
-To build Comet with native DataFusion reader and remote HDFS support it is required to have a JDK installed
+To build Comet with remote HDFS support it is required to have a JDK installed.
 
 Example:
 Build a Comet for `spark-4.1` provide a JDK path in `JAVA_HOME`
@@ -76,11 +74,10 @@ export JAVA_HOME="/opt/homebrew/opt/openjdk@17"
 make release PROFILES="-Pspark-4.1" COMET_FEATURES=hdfs RUSTFLAGS="-L $JAVA_HOME/libexec/openjdk.jdk/Contents/Home/lib/server"
 ```
 
-Start Comet with experimental reader and HDFS support as [described](installation.md/#run-spark-shell-with-comet-enabled)
+Start Comet with HDFS support as [described](installation.md/#run-spark-shell-with-comet-enabled)
 and add additional parameters
 
 ```shell
---conf spark.comet.scan.impl=native_datafusion \
 --conf spark.hadoop.fs.defaultFS="hdfs://namenode:9000" \
 --conf spark.hadoop.dfs.client.use.datanode.hostname = true \
 --conf dfs.client.use.datanode.hostname = true
@@ -158,7 +155,6 @@ JAVA_HOME="/opt/homebrew/opt/openjdk@17" make release PROFILES="-Pspark-4.1" COM
     withSQLConf(
       CometConf.COMET_ENABLED.key -> "true",
       CometConf.COMET_EXEC_ENABLED.key -> "true",
-      CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_DATAFUSION,
       SQLConf.USE_V1_SOURCE_LIST.key -> "parquet",
       "fs.defaultFS" -> "hdfs://namenode:9000",
       "dfs.client.use.datanode.hostname" -> "true") {
@@ -169,7 +165,7 @@ JAVA_HOME="/opt/homebrew/opt/openjdk@17" make release PROFILES="-Pspark-4.1" COM
   }
 ```
 
-Or use `spark-shell` with HDFS support as described [above](#using-experimental-native-datafusion-reader)
+Or use `spark-shell` with HDFS support as described [above](#building-comet-with-hdfs-support)
 
 ## S3
 
 
@@ -183,12 +183,7 @@ case class CometScanRule(session: SparkSession)
           return scanExec
         }
 
-        COMET_NATIVE_SCAN_IMPL.get() match {
-          case SCAN_AUTO | SCAN_NATIVE_DATAFUSION =>
-            nativeDataFusionScan(plan, session, scanExec, r, hadoopConf).getOrElse(scanExec)
-          case SCAN_NATIVE_ICEBERG_COMPAT =>
-            nativeIcebergCompatScan(session, scanExec, r, hadoopConf).getOrElse(scanExec)
-        }
+        nativeDataFusionScan(plan, session, scanExec, r, hadoopConf).getOrElse(scanExec)
 
       case _ =>
         withInfo(scanExec, s"Unsupported relation ${scanExec.relation}")
@@ -242,21 +237,6 @@ case class CometScanRule(session: SparkSession)
     Some(CometScanExec(scanExec, session, SCAN_NATIVE_DATAFUSION))
   }
 
-  private def nativeIcebergCompatScan(
-      session: SparkSession,
-      scanExec: FileSourceScanExec,
-      r: HadoopFsRelation,
-      hadoopConf: Configuration): Option[SparkPlan] = {
-    if (encryptionEnabled(hadoopConf) && !isEncryptionConfigSupported(hadoopConf)) {
-      withInfo(scanExec, s"$SCAN_NATIVE_ICEBERG_COMPAT does not support encryption")
-      return None
-    }
-    if (!isSchemaSupported(scanExec, SCAN_NATIVE_ICEBERG_COMPAT, r)) {
-      return None
-    }
-    Some(CometScanExec(scanExec, session, SCAN_NATIVE_ICEBERG_COMPAT))
-  }
-
   private def transformV2Scan(scanExec: BatchScanExec): SparkPlan = {
 
     scanExec.scan match {
@@ -799,8 +779,7 @@ object CometScanRule extends Logging {
           Native.validateObjectStoreConfig(filePath, objectStoreOptions)
         } catch {
           case e: CometNativeException =>
-            val reason = "Object store config not supported by " +
-              s"$SCAN_NATIVE_ICEBERG_COMPAT: ${e.getMessage}"
+            val reason = s"Object store config not supported: ${e.getMessage}"
             fallbackReasons += reason
             configValidityMap.put(cacheKey, Some(reason))
         }
 
@@ -985,7 +985,7 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
 
   test("size - respect to legacySizeOfNull") {
     val table = "t1"
-    withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_ICEBERG_COMPAT) {
+    withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_DATAFUSION) {
       withTable(table) {
         sql(s"create table $table(col array<string>) using parquet")
         sql(s"insert into $table values(null)")
 
@@ -71,7 +71,7 @@ class CometCsvExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper
   test("to_csv - with configurable formatting options") {
     val table = "t1"
     withSQLConf(
-      CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_ICEBERG_COMPAT,
+      CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_DATAFUSION,
       CometConf.getExprAllowIncompatConfigKey(classOf[StructsToCsv]) -> "true") {
       withTable(table) {
         val newLinesStr =