apache
diff --git a/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonBasePushDown.scala‎
Lines changed: 88 additions & 0 deletions b/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonBasePushDown.scala‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonFormatTableScan.scala‎
Lines changed: 65 additions & 0 deletions b/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonFormatTableScan.scala‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonFormatTableScanBuilder.scala‎
Lines changed: 43 additions & 0 deletions b/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonFormatTableScanBuilder.scala‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonScan.scala‎
Lines changed: 1 addition & 1 deletion b/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonScan.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonScanBuilder.scala‎
Lines changed: 8 additions & 46 deletions b/‎paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonScanBuilder.scala‎
Lines changed: 8 additions & 46 deletions
diff --git a/‎paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonBasePushDown.scala‎
Lines changed: 93 additions & 0 deletions b/‎paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonBasePushDown.scala‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonBaseScanBuilder.scala‎
Lines changed: 0 additions & 18 deletions b/‎paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonBaseScanBuilder.scala‎
Lines changed: 0 additions & 18 deletions
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark
+
+import org.apache.paimon.predicate.{PartitionPredicateVisitor, Predicate}
+import org.apache.paimon.types.RowType
+
+import org.apache.spark.sql.connector.read.SupportsPushDownFilters
+import org.apache.spark.sql.sources.Filter
+
+import java.util.{List => JList}
+
+import scala.collection.mutable
+
+/** Base trait for Paimon scan filter push down. */
+trait PaimonBasePushDown extends SupportsPushDownFilters {
+
+  private var pushedSparkFilters = Array.empty[Filter]
+
+  protected var pushedPaimonPredicates: Array[Predicate] = Array.empty
+
+  protected var reservedFilters: Array[Filter] = Array.empty
+
+  protected var hasPostScanPredicates = false
+
+  protected var partitionKeys: JList[String]
+  protected var rowType: RowType
+
+  /**
+   * Pushes down filters, and returns filters that need to be evaluated after scanning. <p> Rows
+   * should be returned from the data source if and only if all the filters match. That is, filters
+   * must be interpreted as ANDed together.
+   */
+  override def pushFilters(filters: Array[Filter]): Array[Filter] = {
+    val pushable = mutable.ArrayBuffer.empty[(Filter, Predicate)]
+    val postScan = mutable.ArrayBuffer.empty[Filter]
+    val reserved = mutable.ArrayBuffer.empty[Filter]
+
+    val converter = new SparkFilterConverter(rowType)
+    val visitor = new PartitionPredicateVisitor(partitionKeys)
+    filters.foreach {
+      filter =>
+        val predicate = converter.convertIgnoreFailure(filter)
+        if (predicate == null) {
+          postScan.append(filter)
+        } else {
+          pushable.append((filter, predicate))
+          if (predicate.visit(visitor)) {
+            reserved.append(filter)
+          } else {
+            postScan.append(filter)
+          }
+        }
+    }
+
+    if (pushable.nonEmpty) {
+      this.pushedSparkFilters = pushable.map(_._1).toArray
+      this.pushedPaimonPredicates = pushable.map(_._2).toArray
+    }
+    if (reserved.nonEmpty) {
+      this.reservedFilters = reserved.toArray
+    }
+    if (postScan.nonEmpty) {
+      this.hasPostScanPredicates = true
+    }
+    postScan.toArray
+  }
+
+  override def pushedFilters(): Array[Filter] = {
+    pushedSparkFilters
+  }
+}
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark
+
+import org.apache.paimon.predicate.Predicate
+import org.apache.paimon.table.FormatTable
+
+import org.apache.spark.sql.PaimonUtils.fieldReference
+import org.apache.spark.sql.connector.expressions.NamedReference
+import org.apache.spark.sql.connector.read.SupportsRuntimeFiltering
+import org.apache.spark.sql.sources.{Filter, In}
+import org.apache.spark.sql.types.StructType
+
+import scala.collection.JavaConverters._
+
+/** Scan for {@link FormatTable} */
+case class PaimonFormatTableScan(
+    table: FormatTable,
+    requiredSchema: StructType,
+    filters: Seq[Predicate],
+    override val pushDownLimit: Option[Int])
+  extends PaimonFormatTableBaseScan(table, requiredSchema, filters, pushDownLimit)
+  with SupportsRuntimeFiltering
+  with ScanHelper {
+
+  override def filterAttributes(): Array[NamedReference] = {
+    val requiredFields = readBuilder.readType().getFieldNames.asScala
+    table
+      .partitionKeys()
+      .asScala
+      .toArray
+      .filter(requiredFields.contains)
+      .map(fieldReference)
+  }
+
+  override def filter(filters: Array[Filter]): Unit = {
+    val converter = new SparkFilterConverter(table.rowType())
+    val partitionFilter = filters.flatMap {
+      case in @ In(attr, _) if table.partitionKeys().contains(attr) =>
+        Some(converter.convert(in))
+      case _ => None
+    }
+    if (partitionFilter.nonEmpty) {
+      readBuilder.withFilter(partitionFilter.head)
+      // set inputPartitions null to trigger to get the new splits.
+      inputPartitions = null
+    }
+  }
+}
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark
+
+import org.apache.paimon.table.FormatTable
+import org.apache.paimon.types.RowType
+
+import org.apache.spark.sql.connector.read.{SupportsPushDownRequiredColumns, SupportsRuntimeFiltering}
+import org.apache.spark.sql.types.StructType
+
+import java.util.{List => JList}
+
+/** ScanBuilder for {@link FormatTable}. */
+case class PaimonFormatTableScanBuilder(table: FormatTable)
+  extends PaimonBasePushDown
+  with SupportsPushDownRequiredColumns {
+
+  override protected var partitionKeys: JList[String] = table.partitionKeys()
+  override protected var rowType: RowType = table.rowType()
+  protected var requiredSchema: StructType = SparkTypeUtils.fromPaimonRowType(rowType)
+
+  override def build() = PaimonFormatTableScan(table, requiredSchema, pushedPaimonPredicates, None)
+
+  override def pruneColumns(requiredSchema: StructType): Unit = {
+    this.requiredSchema = requiredSchema
+  }
+}
@@ -19,7 +19,7 @@
 package org.apache.paimon.spark
 
 import org.apache.paimon.predicate.{Predicate, TopN}
-import org.apache.paimon.table.{InnerTable, Table}
+import org.apache.paimon.table.InnerTable
 
 import org.apache.spark.sql.PaimonUtils.fieldReference
 import org.apache.spark.sql.connector.expressions.NamedReference
 
@@ -18,61 +18,23 @@
 
 package org.apache.paimon.spark
 
-import org.apache.paimon.predicate.{PartitionPredicateVisitor, Predicate}
 import org.apache.paimon.table.InnerTable
+import org.apache.paimon.types.RowType
 
-import org.apache.spark.sql.connector.read.SupportsPushDownFilters
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.sources.Filter
 
-import scala.collection.mutable
+import java.util.{List => JList}
 
 class PaimonScanBuilder(table: InnerTable)
   extends PaimonBaseScanBuilder(table)
-  with SupportsPushDownFilters {
+  with PaimonBasePushDown {
 
   private var pushedSparkFilters = Array.empty[Filter]
 
-  /**
-   * Pushes down filters, and returns filters that need to be evaluated after scanning. <p> Rows
-   * should be returned from the data source if and only if all the filters match. That is, filters
-   * must be interpreted as ANDed together.
-   */
-  override def pushFilters(filters: Array[Filter]): Array[Filter] = {
-    val pushable = mutable.ArrayBuffer.empty[(Filter, Predicate)]
-    val postScan = mutable.ArrayBuffer.empty[Filter]
-    val reserved = mutable.ArrayBuffer.empty[Filter]
-
-    val converter = new SparkFilterConverter(table.rowType)
-    val visitor = new PartitionPredicateVisitor(table.partitionKeys())
-    filters.foreach {
-      filter =>
-        val predicate = converter.convertIgnoreFailure(filter)
-        if (predicate == null) {
-          postScan.append(filter)
-        } else {
-          pushable.append((filter, predicate))
-          if (predicate.visit(visitor)) {
-            reserved.append(filter)
-          } else {
-            postScan.append(filter)
-          }
-        }
-    }
-
-    if (pushable.nonEmpty) {
-      this.pushedSparkFilters = pushable.map(_._1).toArray
-      this.pushedPaimonPredicates = pushable.map(_._2).toArray
-    }
-    if (reserved.nonEmpty) {
-      this.reservedFilters = reserved.toArray
-    }
-    if (postScan.nonEmpty) {
-      this.hasPostScanPredicates = true
-    }
-    postScan.toArray
-  }
-
-  override def pushedFilters(): Array[Filter] = {
-    pushedSparkFilters
+  override protected var partitionKeys: JList[String] = table.partitionKeys()
+  override protected var rowType: RowType = table.rowType()
+  override def build(): Scan = {
+    PaimonScan(table, requiredSchema, pushedPaimonPredicates, reservedFilters, None, pushDownTopN)
   }
 }
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark
+
+import org.apache.paimon.predicate.{PartitionPredicateVisitor, Predicate}
+import org.apache.paimon.types.RowType
+
+import org.apache.spark.sql.PaimonUtils
+import org.apache.spark.sql.connector.expressions.filter.{Predicate => SparkPredicate}
+import org.apache.spark.sql.connector.read.{SupportsPushDownLimit, SupportsPushDownRequiredColumns, SupportsPushDownV2Filters}
+import org.apache.spark.sql.sources.Filter
+
+import java.util.{List => JList}
+
+import scala.collection.mutable
+
+/** Base trait for Paimon scan push down. */
+trait PaimonBasePushDown extends SupportsPushDownV2Filters with SupportsPushDownLimit {
+  private var pushedSparkPredicates = Array.empty[SparkPredicate]
+
+  protected var pushedPaimonPredicates: Array[Predicate] = Array.empty
+
+  protected var reservedFilters: Array[Filter] = Array.empty
+
+  protected var hasPostScanPredicates = false
+
+  protected var pushDownLimit: Option[Int] = None
+  protected var partitionKeys: JList[String]
+  protected var rowType: RowType
+
+  override def pushPredicates(predicates: Array[SparkPredicate]): Array[SparkPredicate] = {
+    val pushable = mutable.ArrayBuffer.empty[(SparkPredicate, Predicate)]
+    val postScan = mutable.ArrayBuffer.empty[SparkPredicate]
+    val reserved = mutable.ArrayBuffer.empty[Filter]
+
+    val converter = SparkV2FilterConverter(rowType)
+    val visitor = new PartitionPredicateVisitor(partitionKeys)
+    predicates.foreach {
+      predicate =>
+        converter.convert(predicate) match {
+          case Some(paimonPredicate) =>
+            pushable.append((predicate, paimonPredicate))
+            if (paimonPredicate.visit(visitor)) {
+              // We need to filter the stats using filter instead of predicate.
+              reserved.append(PaimonUtils.filterV2ToV1(predicate).get)
+            } else {
+              postScan.append(predicate)
+            }
+          case None =>
+            postScan.append(predicate)
+        }
+    }
+
+    if (pushable.nonEmpty) {
+      this.pushedSparkPredicates = pushable.map(_._1).toArray
+      this.pushedPaimonPredicates = pushable.map(_._2).toArray
+    }
+    if (reserved.nonEmpty) {
+      this.reservedFilters = reserved.toArray
+    }
+    if (postScan.nonEmpty) {
+      this.hasPostScanPredicates = true
+    }
+    postScan.toArray
+  }
+
+  override def pushedPredicates: Array[SparkPredicate] = {
+    pushedSparkPredicates
+  }
+
+  override def pushLimit(limit: Int): Boolean = {
+    // It is safe, since we will do nothing if it is the primary table and the split is not `rawConvertible`
+    pushDownLimit = Some(limit)
+    // just make the best effort to push down limit
+    false
+  }
+}
@@ -33,26 +33,8 @@ abstract class PaimonBaseScanBuilder(table: InnerTable)
 
   protected var requiredSchema: StructType = SparkTypeUtils.fromPaimonRowType(table.rowType())
 
-  protected var pushedPaimonPredicates: Array[Predicate] = Array.empty
-
-  protected var reservedFilters: Array[Filter] = Array.empty
-
-  protected var hasPostScanPredicates = false
-
-  protected var pushDownLimit: Option[Int] = None
-
   protected var pushDownTopN: Option[TopN] = None
 
-  override def build(): Scan = {
-    PaimonScan(
-      table,
-      requiredSchema,
-      pushedPaimonPredicates,
-      reservedFilters,
-      pushDownLimit,
-      pushDownTopN)
-  }
-
   override def pruneColumns(requiredSchema: StructType): Unit = {
     this.requiredSchema = requiredSchema
   }