|
18 | 18 |
|
19 | 19 | package org.apache.spark.sql.execution |
20 | 20 |
|
21 | | -import org.apache.paimon.spark.{PaimonFormatTableScanBuilder, SparkTypeUtils} |
| 21 | +import org.apache.paimon.fs.TwoPhaseOutputStream |
| 22 | +import org.apache.paimon.spark.{PaimonFormatTableScanBuilder, SparkInternalRowWrapper, SparkTypeUtils} |
22 | 23 | import org.apache.paimon.table.FormatTable |
| 24 | +import org.apache.paimon.table.format.{FormatBatchWriteBuilder, TwoPhaseCommitMessage} |
| 25 | +import org.apache.paimon.table.sink.BatchTableWrite |
23 | 26 |
|
24 | 27 | import org.apache.hadoop.fs.Path |
| 28 | +import org.apache.spark.internal.Logging |
25 | 29 | import org.apache.spark.sql.SparkSession |
26 | 30 | import org.apache.spark.sql.catalyst.InternalRow |
27 | 31 | import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal} |
28 | 32 | import org.apache.spark.sql.connector.catalog.{SupportsPartitionManagement, SupportsRead, SupportsWrite, TableCapability} |
29 | | -import org.apache.spark.sql.connector.catalog.TableCapability.BATCH_READ |
| 33 | +import org.apache.spark.sql.connector.catalog.TableCapability.{BATCH_READ, BATCH_WRITE} |
30 | 34 | import org.apache.spark.sql.connector.read.ScanBuilder |
31 | | -import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder} |
| 35 | +import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, LogicalWriteInfo, PhysicalWriteInfo, Write, WriteBuilder, WriterCommitMessage} |
| 36 | +import org.apache.spark.sql.connector.write.streaming.StreamingWrite |
32 | 37 | import org.apache.spark.sql.execution.datasources._ |
33 | 38 | import org.apache.spark.sql.execution.datasources.v2.csv.{CSVScanBuilder, CSVTable} |
34 | 39 | import org.apache.spark.sql.execution.datasources.v2.json.JsonTable |
@@ -186,15 +191,15 @@ case class PaimonFormatTable( |
186 | 191 | } |
187 | 192 |
|
188 | 193 | override def capabilities(): util.Set[TableCapability] = { |
189 | | - util.EnumSet.of(BATCH_READ) |
| 194 | + util.EnumSet.of(BATCH_READ, BATCH_WRITE) |
190 | 195 | } |
191 | 196 |
|
192 | 197 | override def newScanBuilder(caseInsensitiveStringMap: CaseInsensitiveStringMap): ScanBuilder = { |
193 | 198 | PaimonFormatTableScanBuilder(table.copy(caseInsensitiveStringMap), schema, Seq.empty) |
194 | 199 | } |
195 | 200 |
|
196 | 201 | override def newWriteBuilder(logicalWriteInfo: LogicalWriteInfo): WriteBuilder = { |
197 | | - throw new UnsupportedOperationException() |
| 202 | + PaimonFormatTableWriterBuilder(table, schema) |
198 | 203 | } |
199 | 204 | } |
200 | 205 |
|
@@ -297,3 +302,140 @@ class PartitionedJsonTable( |
297 | 302 | partitionSchema()) |
298 | 303 | } |
299 | 304 | } |
| 305 | + |
| 306 | +case class PaimonFormatTableWriterBuilder(table: FormatTable, writeSchema: StructType) |
| 307 | + extends WriteBuilder { |
| 308 | + override def build: Write = new Write() { |
| 309 | + override def toBatch: BatchWrite = { |
| 310 | + FormatTableBatchWrite(table, writeSchema) |
| 311 | + } |
| 312 | + |
| 313 | + override def toStreaming: StreamingWrite = { |
| 314 | + throw new UnsupportedOperationException("FormatTable doesn't support streaming write") |
| 315 | + } |
| 316 | + } |
| 317 | +} |
| 318 | + |
| 319 | +private case class FormatTableBatchWrite(table: FormatTable, writeSchema: StructType) |
| 320 | + extends BatchWrite |
| 321 | + with Logging { |
| 322 | + |
| 323 | + override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = |
| 324 | + FormatTableWriterFactory(table, writeSchema) |
| 325 | + |
| 326 | + override def useCommitCoordinator(): Boolean = false |
| 327 | + |
| 328 | + override def commit(messages: Array[WriterCommitMessage]): Unit = { |
| 329 | + logInfo(s"Committing to FormatTable ${table.name()}") |
| 330 | + |
| 331 | + val committers = messages |
| 332 | + .collect { |
| 333 | + case taskCommit: FormatTableTaskCommit => taskCommit.committers() |
| 334 | + case other => |
| 335 | + throw new IllegalArgumentException(s"${other.getClass.getName} is not supported") |
| 336 | + } |
| 337 | + .flatten |
| 338 | + .toSeq |
| 339 | + |
| 340 | + try { |
| 341 | + val start = System.currentTimeMillis() |
| 342 | + committers.foreach(_.commit()) |
| 343 | + logInfo(s"Committed in ${System.currentTimeMillis() - start} ms") |
| 344 | + } catch { |
| 345 | + case e: Exception => |
| 346 | + logError("Failed to commit FormatTable writes", e) |
| 347 | + throw e |
| 348 | + } |
| 349 | + } |
| 350 | + |
| 351 | + override def abort(messages: Array[WriterCommitMessage]): Unit = { |
| 352 | + logInfo(s"Aborting write to FormatTable ${table.name()}") |
| 353 | + val committers = messages.collect { |
| 354 | + case taskCommit: FormatTableTaskCommit => taskCommit.committers() |
| 355 | + }.flatten |
| 356 | + |
| 357 | + committers.foreach { |
| 358 | + committer => |
| 359 | + try { |
| 360 | + committer.discard() |
| 361 | + } catch { |
| 362 | + case e: Exception => logWarning(s"Failed to abort committer: ${e.getMessage}") |
| 363 | + } |
| 364 | + } |
| 365 | + } |
| 366 | +} |
| 367 | + |
| 368 | +private case class FormatTableWriterFactory(table: FormatTable, writeSchema: StructType) |
| 369 | + extends DataWriterFactory { |
| 370 | + |
| 371 | + override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = { |
| 372 | + val formatTableWrite = table.newBatchWriteBuilder().newWrite() |
| 373 | + new FormatTableDataWriter(table, formatTableWrite, writeSchema) |
| 374 | + } |
| 375 | +} |
| 376 | + |
| 377 | +private class FormatTableDataWriter( |
| 378 | + table: FormatTable, |
| 379 | + formatTableWrite: BatchTableWrite, |
| 380 | + writeSchema: StructType) |
| 381 | + extends DataWriter[InternalRow] |
| 382 | + with Logging { |
| 383 | + |
| 384 | + private val rowConverter: InternalRow => org.apache.paimon.data.InternalRow = { |
| 385 | + val numFields = writeSchema.fields.length |
| 386 | + record => { |
| 387 | + new SparkInternalRowWrapper(-1, writeSchema, numFields).replace(record) |
| 388 | + } |
| 389 | + } |
| 390 | + |
| 391 | + override def write(record: InternalRow): Unit = { |
| 392 | + val paimonRow = rowConverter.apply(record) |
| 393 | + formatTableWrite.write(paimonRow) |
| 394 | + } |
| 395 | + |
| 396 | + override def commit(): WriterCommitMessage = { |
| 397 | + try { |
| 398 | + val committers = formatTableWrite |
| 399 | + .prepareCommit() |
| 400 | + .asScala |
| 401 | + .map { |
| 402 | + case committer: TwoPhaseCommitMessage => committer.getCommitter |
| 403 | + case other => |
| 404 | + throw new IllegalArgumentException( |
| 405 | + "Unsupported commit message type: " + other.getClass.getSimpleName) |
| 406 | + } |
| 407 | + .toSeq |
| 408 | + FormatTableTaskCommit(committers) |
| 409 | + } finally { |
| 410 | + close() |
| 411 | + } |
| 412 | + } |
| 413 | + |
| 414 | + override def abort(): Unit = { |
| 415 | + logInfo("Aborting FormatTable data writer") |
| 416 | + close() |
| 417 | + } |
| 418 | + |
| 419 | + override def close(): Unit = { |
| 420 | + try { |
| 421 | + formatTableWrite.close() |
| 422 | + } catch { |
| 423 | + case e: Exception => |
| 424 | + logError("Error closing FormatTableDataWriter", e) |
| 425 | + throw new RuntimeException(e) |
| 426 | + } |
| 427 | + } |
| 428 | +} |
| 429 | + |
| 430 | +/** Commit message container for FormatTable writes, holding committers that need to be executed. */ |
| 431 | +class FormatTableTaskCommit private (private val _committers: Seq[TwoPhaseOutputStream.Committer]) |
| 432 | + extends WriterCommitMessage { |
| 433 | + |
| 434 | + def committers(): Seq[TwoPhaseOutputStream.Committer] = _committers |
| 435 | +} |
| 436 | + |
| 437 | +object FormatTableTaskCommit { |
| 438 | + def apply(committers: Seq[TwoPhaseOutputStream.Committer]): FormatTableTaskCommit = { |
| 439 | + new FormatTableTaskCommit(committers) |
| 440 | + } |
| 441 | +} |
0 commit comments