|
| 1 | +package com.bp.sds.cef |
| 2 | + |
| 3 | +import org.apache.hadoop.fs.Path |
| 4 | +import org.apache.spark.sql.SparkSession |
| 5 | +import org.apache.spark.sql.catalyst.expressions.Expression |
| 6 | +import org.apache.spark.sql.connector.read.PartitionReaderFactory |
| 7 | +import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex |
| 8 | +import org.apache.spark.sql.execution.datasources.v2.{FileScan, TextBasedFileScan} |
| 9 | +import org.apache.spark.sql.types.StructType |
| 10 | +import org.apache.spark.sql.util.CaseInsensitiveStringMap |
| 11 | +import org.apache.spark.util.SerializableConfiguration |
| 12 | + |
| 13 | +import scala.collection.JavaConverters.mapAsScalaMapConverter |
| 14 | + |
| 15 | +private[cef] case class CefScan( |
| 16 | + sparkSession: SparkSession, |
| 17 | + fileIndex: PartitioningAwareFileIndex, |
| 18 | + dataSchema: StructType, |
| 19 | + readDataSchema: StructType, |
| 20 | + readPartitionSchema: StructType, |
| 21 | + options: CaseInsensitiveStringMap, |
| 22 | + partitionFilters: Seq[Expression] = Seq.empty, |
| 23 | + dataFilters: Seq[Expression] = Seq.empty |
| 24 | + ) extends TextBasedFileScan(sparkSession, options) { |
| 25 | + private val optionsAsScala = options.asScala.toMap |
| 26 | + private val cefOptions = CefParserOptions.from(options) |
| 27 | + |
| 28 | + override def isSplitable(path: Path): Boolean = super.isSplitable(path) |
| 29 | + |
| 30 | + override def withFilters(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): FileScan = |
| 31 | + this.copy(partitionFilters = partitionFilters, dataFilters = dataFilters) |
| 32 | + |
| 33 | + override def createReaderFactory(): PartitionReaderFactory = { |
| 34 | + val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(optionsAsScala) |
| 35 | + val broadcastConf = sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf)) |
| 36 | + CefPartitionReaderFactory(sparkSession.sessionState.conf, broadcastConf, dataSchema, readDataSchema, readPartitionSchema, cefOptions) |
| 37 | + } |
| 38 | +} |
0 commit comments