|
34 | 34 | import static org.apache.iceberg.TableProperties.ORC_COMPRESSION_STRATEGY; |
35 | 35 | import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION; |
36 | 36 | import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_LEVEL; |
| 37 | +import static org.apache.iceberg.TableProperties.PARQUET_SHRED_VARIANTS; |
37 | 38 | import static org.apache.iceberg.TableProperties.UPDATE_DISTRIBUTION_MODE; |
38 | 39 | import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE; |
39 | 40 | import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE_HASH; |
|
61 | 62 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; |
62 | 63 | import org.apache.iceberg.relocated.com.google.common.collect.Lists; |
63 | 64 | import org.apache.spark.sql.internal.SQLConf; |
| 65 | +import org.apache.spark.sql.util.CaseInsensitiveStringMap; |
64 | 66 | import org.junit.jupiter.api.AfterEach; |
65 | 67 | import org.junit.jupiter.api.BeforeEach; |
66 | 68 | import org.junit.jupiter.api.TestTemplate; |
@@ -340,6 +342,8 @@ public void testSparkConfOverride() { |
340 | 342 | TableProperties.DELETE_PARQUET_COMPRESSION, |
341 | 343 | "snappy"), |
342 | 344 | ImmutableMap.of( |
| 345 | + PARQUET_SHRED_VARIANTS, |
| 346 | + "false", |
343 | 347 | DELETE_PARQUET_COMPRESSION, |
344 | 348 | "zstd", |
345 | 349 | PARQUET_COMPRESSION, |
@@ -461,6 +465,8 @@ public void testDataPropsDefaultsAsDeleteProps() { |
461 | 465 | PARQUET_COMPRESSION_LEVEL, |
462 | 466 | "5"), |
463 | 467 | ImmutableMap.of( |
| 468 | + PARQUET_SHRED_VARIANTS, |
| 469 | + "false", |
464 | 470 | DELETE_PARQUET_COMPRESSION, |
465 | 471 | "zstd", |
466 | 472 | PARQUET_COMPRESSION, |
@@ -532,6 +538,8 @@ public void testDeleteFileWriteConf() { |
532 | 538 | DELETE_PARQUET_COMPRESSION_LEVEL, |
533 | 539 | "6"), |
534 | 540 | ImmutableMap.of( |
| 541 | + PARQUET_SHRED_VARIANTS, |
| 542 | + "false", |
535 | 543 | DELETE_PARQUET_COMPRESSION, |
536 | 544 | "zstd", |
537 | 545 | PARQUET_COMPRESSION, |
@@ -686,4 +694,81 @@ private void checkMode(DistributionMode expectedMode, SparkWriteConf writeConf) |
686 | 694 | assertThat(writeConf.copyOnWriteDistributionMode(MERGE)).isEqualTo(expectedMode); |
687 | 695 | assertThat(writeConf.positionDeltaDistributionMode(MERGE)).isEqualTo(expectedMode); |
688 | 696 | } |
| 697 | + |
| 698 | + @TestTemplate |
| 699 | + public void testShredVariantsDefault() { |
| 700 | + Table table = validationCatalog.loadTable(tableIdent); |
| 701 | + SparkWriteConf writeConf = new SparkWriteConf(spark, table, ImmutableMap.of()); |
| 702 | + assertThat(writeConf.shredVariants()).isFalse(); |
| 703 | + } |
| 704 | + |
| 705 | + @TestTemplate |
| 706 | + public void testVariantInferenceBufferSizeDefault() { |
| 707 | + Table table = validationCatalog.loadTable(tableIdent); |
| 708 | + SparkWriteConf writeConf = new SparkWriteConf(spark, table, ImmutableMap.of()); |
| 709 | + assertThat(writeConf.variantInferenceBufferSize()) |
| 710 | + .isEqualTo(TableProperties.PARQUET_VARIANT_BUFFER_SIZE_DEFAULT); |
| 711 | + } |
| 712 | + |
| 713 | + @TestTemplate |
| 714 | + public void testVariantInferenceBufferSizeTableProperty() { |
| 715 | + Table table = validationCatalog.loadTable(tableIdent); |
| 716 | + |
| 717 | + table.updateProperties().set(TableProperties.PARQUET_VARIANT_BUFFER_SIZE, "500").commit(); |
| 718 | + |
| 719 | + SparkWriteConf writeConf = new SparkWriteConf(spark, table, ImmutableMap.of()); |
| 720 | + assertThat(writeConf.variantInferenceBufferSize()).isEqualTo(500); |
| 721 | + } |
| 722 | + |
| 723 | + @TestTemplate |
| 724 | + public void testShredVariantsSessionOverridesTableProperty() { |
| 725 | + Table table = validationCatalog.loadTable(tableIdent); |
| 726 | + table.updateProperties().set(TableProperties.PARQUET_SHRED_VARIANTS, "false").commit(); |
| 727 | + |
| 728 | + withSQLConf( |
| 729 | + ImmutableMap.of(SparkSQLProperties.SHRED_VARIANTS, "true"), |
| 730 | + () -> { |
| 731 | + SparkWriteConf writeConf = new SparkWriteConf(spark, table, ImmutableMap.of()); |
| 732 | + assertThat(writeConf.shredVariants()).isTrue(); |
| 733 | + }); |
| 734 | + } |
| 735 | + |
| 736 | + @TestTemplate |
| 737 | + public void testShredVariantsWriteOptionOverridesSessionConf() { |
| 738 | + withSQLConf( |
| 739 | + ImmutableMap.of(SparkSQLProperties.SHRED_VARIANTS, "false"), |
| 740 | + () -> { |
| 741 | + Table table = validationCatalog.loadTable(tableIdent); |
| 742 | + SparkWriteConf writeConf = |
| 743 | + new SparkWriteConf( |
| 744 | + spark, |
| 745 | + table, |
| 746 | + new CaseInsensitiveStringMap( |
| 747 | + ImmutableMap.of(SparkWriteOptions.SHRED_VARIANTS, "true"))); |
| 748 | + assertThat(writeConf.shredVariants()).isTrue(); |
| 749 | + }); |
| 750 | + } |
| 751 | + |
| 752 | + @TestTemplate |
| 753 | + public void testVariantInferenceBufferSizeSessionConf() { |
| 754 | + withSQLConf( |
| 755 | + ImmutableMap.of(SparkSQLProperties.VARIANT_INFERENCE_BUFFER_SIZE, "250"), |
| 756 | + () -> { |
| 757 | + Table table = validationCatalog.loadTable(tableIdent); |
| 758 | + SparkWriteConf writeConf = new SparkWriteConf(spark, table, ImmutableMap.of()); |
| 759 | + assertThat(writeConf.variantInferenceBufferSize()).isEqualTo(250); |
| 760 | + }); |
| 761 | + } |
| 762 | + |
| 763 | + @TestTemplate |
| 764 | + public void testWritePropertiesIncludeVariantShredding() { |
| 765 | + Table table = validationCatalog.loadTable(tableIdent); |
| 766 | + table.updateProperties().set(TableProperties.PARQUET_SHRED_VARIANTS, "true").commit(); |
| 767 | + table.updateProperties().set(TableProperties.PARQUET_VARIANT_BUFFER_SIZE, "200").commit(); |
| 768 | + |
| 769 | + SparkWriteConf writeConf = new SparkWriteConf(spark, table, ImmutableMap.of()); |
| 770 | + Map<String, String> writeProperties = writeConf.writeProperties(); |
| 771 | + assertThat(writeProperties).containsEntry(PARQUET_SHRED_VARIANTS, "true"); |
| 772 | + assertThat(writeProperties).containsEntry(TableProperties.PARQUET_VARIANT_BUFFER_SIZE, "200"); |
| 773 | + } |
689 | 774 | } |
0 commit comments