From 9e7ab2c2004ecb79ebe30133837250f25b7b3ba3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 28 Jan 2026 18:08:26 -0700 Subject: [PATCH 1/3] Add fallback checks in nativeDataFusionScan for unsupported features Add checks for metadata columns, Parquet field ID reads, bucketed scans, and row index generation so that auto mode falls back to native_iceberg_compat. Co-Authored-By: Claude Opus 4.5 --- .../comet/serde/operator/CometNativeScan.scala | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala index b7909b67cb..7c8d4cbf95 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala @@ -39,6 +39,7 @@ import org.apache.comet.serde.{CometOperatorSerde, Compatible, OperatorOuterClas import org.apache.comet.serde.ExprOuterClass.Expr import org.apache.comet.serde.OperatorOuterClass.Operator import org.apache.comet.serde.QueryPlanSerde.{exprToProto, serializeDataType} +import org.apache.comet.shims.ShimFileFormat /** * Validation and serde logic for `native_datafusion` scans. @@ -77,6 +78,22 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { withInfo(scanExec, "Full native scan disabled because ignoreMissingFiles enabled") } + if (scanExec.fileConstantMetadataColumns.nonEmpty) { + withInfo(scanExec, "Native DataFusion scan does not support metadata columns") + } + + if (CometParquetUtils.readFieldId(SQLConf.get)) { + withInfo(scanExec, "Native DataFusion scan does not support Parquet field ID based reads") + } + + if (scanExec.bucketedScan) { + withInfo(scanExec, "Native DataFusion scan does not support bucketed scans") + } + + if (ShimFileFormat.findRowIndexColumnIndexInSchema(scanExec.requiredSchema) >= 0) { + withInfo(scanExec, "Native DataFusion scan does not support row index generation") + } + // the scan is supported if no fallback reasons were added to the node !hasExplainInfo(scanExec) } From 788483e107c7c249aac5f71c41718c42ea8d1356 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 28 Jan 2026 18:56:53 -0700 Subject: [PATCH 2/3] fix --- .../org/apache/comet/serde/operator/CometNativeScan.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala index 7c8d4cbf95..49b043840a 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala @@ -82,10 +82,6 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { withInfo(scanExec, "Native DataFusion scan does not support metadata columns") } - if (CometParquetUtils.readFieldId(SQLConf.get)) { - withInfo(scanExec, "Native DataFusion scan does not support Parquet field ID based reads") - } - if (scanExec.bucketedScan) { withInfo(scanExec, "Native DataFusion scan does not support bucketed scans") } From e9ea9b4a582ae812a67a6b1f99ebb108c46840f4 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 4 Feb 2026 09:15:42 -0700 Subject: [PATCH 3/3] Remove bucketed scan fallback for native DataFusion scan Bucketed scans should work with native DataFusion scan without requiring a fallback to Spark. Co-Authored-By: Claude Opus 4.5 --- .../org/apache/comet/serde/operator/CometNativeScan.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala index 49b043840a..6a7a5bd98c 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala @@ -82,10 +82,6 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging { withInfo(scanExec, "Native DataFusion scan does not support metadata columns") } - if (scanExec.bucketedScan) { - withInfo(scanExec, "Native DataFusion scan does not support bucketed scans") - } - if (ShimFileFormat.findRowIndexColumnIndexInSchema(scanExec.requiredSchema) >= 0) { withInfo(scanExec, "Native DataFusion scan does not support row index generation") }