|
21 | 21 | import org.apache.paimon.CoreOptions; |
22 | 22 | import org.apache.paimon.compression.CompressOptions; |
23 | 23 | import org.apache.paimon.data.BinaryRow; |
| 24 | +import org.apache.paimon.data.BinaryRowWriter; |
24 | 25 | import org.apache.paimon.data.BinaryString; |
| 26 | +import org.apache.paimon.data.BlobData; |
25 | 27 | import org.apache.paimon.data.GenericRow; |
26 | 28 | import org.apache.paimon.data.InternalRow; |
27 | 29 | import org.apache.paimon.disk.ChannelWithMeta; |
|
38 | 40 | import org.apache.paimon.manifest.FileSource; |
39 | 41 | import org.apache.paimon.memory.HeapMemorySegmentPool; |
40 | 42 | import org.apache.paimon.memory.MemoryPoolFactory; |
| 43 | +import org.apache.paimon.operation.BaseAppendFileStoreWrite; |
41 | 44 | import org.apache.paimon.options.MemorySize; |
42 | 45 | import org.apache.paimon.options.Options; |
| 46 | +import org.apache.paimon.schema.Schema; |
| 47 | +import org.apache.paimon.schema.TableSchema; |
43 | 48 | import org.apache.paimon.stats.SimpleStatsConverter; |
| 49 | +import org.apache.paimon.table.AppendOnlyFileStoreTable; |
| 50 | +import org.apache.paimon.table.FileStoreTableFactory; |
| 51 | +import org.apache.paimon.types.BlobType; |
44 | 52 | import org.apache.paimon.types.DataType; |
45 | 53 | import org.apache.paimon.types.IntType; |
46 | 54 | import org.apache.paimon.types.RowType; |
|
62 | 70 | import java.util.ArrayList; |
63 | 71 | import java.util.Arrays; |
64 | 72 | import java.util.Collections; |
| 73 | +import java.util.HashMap; |
65 | 74 | import java.util.LinkedList; |
66 | 75 | import java.util.List; |
67 | 76 | import java.util.Set; |
@@ -394,6 +403,69 @@ public void testSpillWorksAndMoreSmallFilesGenerated() throws Exception { |
394 | 403 | }); |
395 | 404 | } |
396 | 405 |
|
| 406 | + @Test |
| 407 | + public void testNoSpillWhenMeetBlobType() throws Exception { |
| 408 | + // Create a schema with BLOB type |
| 409 | + RowType blobSchema = |
| 410 | + RowType.builder() |
| 411 | + .fields( |
| 412 | + new DataType[] {new IntType(), new VarCharType(), new BlobType()}, |
| 413 | + new String[] {"id", "name", "data"}) |
| 414 | + .build(); |
| 415 | + |
| 416 | + AppendOnlyFileStoreTable table = |
| 417 | + (AppendOnlyFileStoreTable) |
| 418 | + FileStoreTableFactory.create( |
| 419 | + LocalFileIO.create(), |
| 420 | + pathFactory.newPath(), |
| 421 | + TableSchema.create( |
| 422 | + 0, |
| 423 | + new Schema( |
| 424 | + blobSchema.getFields(), |
| 425 | + Collections.singletonList("id"), |
| 426 | + Collections.emptyList(), |
| 427 | + new HashMap<String, String>() { |
| 428 | + { |
| 429 | + put( |
| 430 | + CoreOptions.DATA_EVOLUTION_ENABLED |
| 431 | + .key(), |
| 432 | + "true"); |
| 433 | + put( |
| 434 | + CoreOptions.ROW_TRACKING_ENABLED |
| 435 | + .key(), |
| 436 | + "true"); |
| 437 | + } |
| 438 | + }, |
| 439 | + ""))); |
| 440 | + BaseAppendFileStoreWrite writer = table.store().newWrite("test"); |
| 441 | + writer.withIOManager(IOManager.create(tempDir.toString())); |
| 442 | + writer.withMemoryPoolFactory( |
| 443 | + new MemoryPoolFactory(new HeapMemorySegmentPool(16384L, 1024))); |
| 444 | + |
| 445 | + char[] largeString = new char[990]; |
| 446 | + Arrays.fill(largeString, 'a'); |
| 447 | + byte[] largeBlobData = new byte[1024]; |
| 448 | + Arrays.fill(largeBlobData, (byte) 'b'); |
| 449 | + |
| 450 | + BinaryRow binaryRow = new BinaryRow(1); |
| 451 | + BinaryRowWriter binaryRowWriter = new BinaryRowWriter(binaryRow); |
| 452 | + for (int j = 0; j < 100; j++) { |
| 453 | + binaryRowWriter.reset(); |
| 454 | + binaryRowWriter.writeInt(0, j); |
| 455 | + binaryRowWriter.complete(); |
| 456 | + writer.write( |
| 457 | + binaryRow, 0, createBlobRow(j, String.valueOf(largeString), largeBlobData)); |
| 458 | + } |
| 459 | + |
| 460 | + binaryRowWriter.reset(); |
| 461 | + binaryRowWriter.writeInt(0, 1000); |
| 462 | + binaryRowWriter.complete(); |
| 463 | + AppendOnlyWriter appendOnlyWriter = (AppendOnlyWriter) writer.createWriter(binaryRow, 0); |
| 464 | + RowBuffer buffer = appendOnlyWriter.getWriteBuffer(); |
| 465 | + assertThat(buffer).isNull(); |
| 466 | + writer.close(); |
| 467 | + } |
| 468 | + |
397 | 469 | @Test |
398 | 470 | public void testNoBuffer() throws Exception { |
399 | 471 | AppendOnlyWriter writer = createEmptyWriter(Long.MAX_VALUE); |
@@ -686,4 +758,56 @@ private DataFileMeta generateCompactAfter(List<DataFileMeta> toCompact) throws I |
686 | 758 | null, |
687 | 759 | null); |
688 | 760 | } |
| 761 | + |
| 762 | + private InternalRow createBlobRow(int id, String name, byte[] blobData) { |
| 763 | + return GenericRow.of(id, BinaryString.fromString(name), new BlobData(blobData)); |
| 764 | + } |
| 765 | + |
| 766 | + private AppendOnlyWriter createWriterWithBlobSchema( |
| 767 | + RowType schema, long targetFileSize, boolean spillable) { |
| 768 | + FileFormat fileFormat = FileFormat.fromIdentifier(AVRO, new Options()); |
| 769 | + LinkedList<DataFileMeta> toCompact = new LinkedList<>(); |
| 770 | + BucketedAppendCompactManager compactManager = |
| 771 | + new BucketedAppendCompactManager( |
| 772 | + Executors.newSingleThreadScheduledExecutor( |
| 773 | + new ExecutorThreadFactory("compaction-thread")), |
| 774 | + toCompact, |
| 775 | + null, |
| 776 | + MIN_FILE_NUM, |
| 777 | + targetFileSize, |
| 778 | + false, |
| 779 | + compactBefore -> Collections.emptyList(), |
| 780 | + null); |
| 781 | + CoreOptions options = |
| 782 | + new CoreOptions(Collections.singletonMap("metadata.stats-mode", "truncate(16)")); |
| 783 | + AppendOnlyWriter writer = |
| 784 | + new AppendOnlyWriter( |
| 785 | + LocalFileIO.create(), |
| 786 | + IOManager.create(tempDir.toString()), |
| 787 | + SCHEMA_ID, |
| 788 | + fileFormat, |
| 789 | + targetFileSize, |
| 790 | + schema, |
| 791 | + null, |
| 792 | + getMaxSequenceNumber(toCompact), |
| 793 | + compactManager, |
| 794 | + files -> { |
| 795 | + throw new RuntimeException("Can't read back in blob mode"); |
| 796 | + }, |
| 797 | + false, |
| 798 | + pathFactory, |
| 799 | + null, |
| 800 | + false, |
| 801 | + spillable, |
| 802 | + CoreOptions.FILE_COMPRESSION.defaultValue(), |
| 803 | + CompressOptions.defaultOptions(), |
| 804 | + new StatsCollectorFactories(options), |
| 805 | + MemorySize.MAX_VALUE, |
| 806 | + new FileIndexOptions(), |
| 807 | + true, |
| 808 | + false); |
| 809 | + writer.setMemoryPool( |
| 810 | + new HeapMemorySegmentPool(options.writeBufferSize(), options.pageSize())); |
| 811 | + return writer; |
| 812 | + } |
689 | 813 | } |
0 commit comments