|
27 | 27 |
|
28 | 28 | #include "iceberg/arrow/arrow_fs_file_io_internal.h" |
29 | 29 | #include "iceberg/avro/avro_register.h" |
| 30 | +#include "iceberg/data/equality_delete_writer.h" |
30 | 31 | #include "iceberg/data/position_delete_writer.h" |
31 | 32 | #include "iceberg/file_format.h" |
32 | 33 | #include "iceberg/manifest/manifest_entry.h" |
@@ -423,4 +424,142 @@ TEST_F(PositionDeleteWriterTest, AutoFlushOnThreshold) { |
423 | 424 | EXPECT_GT(data_file->file_size_in_bytes, 0); |
424 | 425 | } |
425 | 426 |
|
| 427 | +class EqualityDeleteWriterTest : public DataWriterTest { |
| 428 | + protected: |
| 429 | + EqualityDeleteWriterOptions MakeDeleteOptions( |
| 430 | + std::vector<int32_t> equality_field_ids = {1, 2}, |
| 431 | + std::optional<int32_t> sort_order_id = std::nullopt) { |
| 432 | + return EqualityDeleteWriterOptions{ |
| 433 | + .path = "test_eq_deletes.parquet", |
| 434 | + .schema = schema_, |
| 435 | + .spec = partition_spec_, |
| 436 | + .partition = PartitionValues{}, |
| 437 | + .format = FileFormatType::kParquet, |
| 438 | + .io = file_io_, |
| 439 | + .equality_field_ids = std::move(equality_field_ids), |
| 440 | + .sort_order_id = sort_order_id, |
| 441 | + .properties = {{"write.parquet.compression-codec", "uncompressed"}}, |
| 442 | + }; |
| 443 | + } |
| 444 | + |
| 445 | + void WriteTestDataToEqualityWriter(EqualityDeleteWriter* writer) { |
| 446 | + auto test_data = CreateTestData(); |
| 447 | + ArrowArray arrow_array; |
| 448 | + ASSERT_TRUE(::arrow::ExportArray(*test_data, &arrow_array).ok()); |
| 449 | + ASSERT_THAT(writer->Write(&arrow_array), IsOk()); |
| 450 | + } |
| 451 | +}; |
| 452 | + |
| 453 | +TEST_F(EqualityDeleteWriterTest, WriteAndClose) { |
| 454 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 455 | + ASSERT_THAT(writer_result, IsOk()); |
| 456 | + auto writer = std::move(writer_result.value()); |
| 457 | + |
| 458 | + WriteTestDataToEqualityWriter(writer.get()); |
| 459 | + |
| 460 | + auto length_result = writer->Length(); |
| 461 | + ASSERT_THAT(length_result, IsOk()); |
| 462 | + EXPECT_GT(length_result.value(), 0); |
| 463 | + |
| 464 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 465 | +} |
| 466 | + |
| 467 | +TEST_F(EqualityDeleteWriterTest, MetadataAfterClose) { |
| 468 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 469 | + ASSERT_THAT(writer_result, IsOk()); |
| 470 | + auto writer = std::move(writer_result.value()); |
| 471 | + |
| 472 | + WriteTestDataToEqualityWriter(writer.get()); |
| 473 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 474 | + |
| 475 | + auto metadata_result = writer->Metadata(); |
| 476 | + ASSERT_THAT(metadata_result, IsOk()); |
| 477 | + |
| 478 | + const auto& write_result = metadata_result.value(); |
| 479 | + ASSERT_EQ(write_result.data_files.size(), 1); |
| 480 | + |
| 481 | + const auto& data_file = write_result.data_files[0]; |
| 482 | + EXPECT_EQ(data_file->content, DataFile::Content::kEqualityDeletes); |
| 483 | + EXPECT_EQ(data_file->file_path, "test_eq_deletes.parquet"); |
| 484 | + EXPECT_EQ(data_file->file_format, FileFormatType::kParquet); |
| 485 | + EXPECT_GT(data_file->file_size_in_bytes, 0); |
| 486 | + |
| 487 | + // Partition spec id must be set |
| 488 | + ASSERT_TRUE(data_file->partition_spec_id.has_value()); |
| 489 | + EXPECT_EQ(data_file->partition_spec_id.value(), PartitionSpec::kInitialSpecId); |
| 490 | + |
| 491 | + // Equality field ids must be set |
| 492 | + ASSERT_EQ(data_file->equality_ids.size(), 2); |
| 493 | + EXPECT_EQ(data_file->equality_ids[0], 1); |
| 494 | + EXPECT_EQ(data_file->equality_ids[1], 2); |
| 495 | +} |
| 496 | + |
| 497 | +TEST_F(EqualityDeleteWriterTest, MetadataBeforeCloseReturnsError) { |
| 498 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 499 | + ASSERT_THAT(writer_result, IsOk()); |
| 500 | + auto writer = std::move(writer_result.value()); |
| 501 | + |
| 502 | + auto metadata_result = writer->Metadata(); |
| 503 | + ASSERT_THAT(metadata_result, IsError(ErrorKind::kValidationFailed)); |
| 504 | + EXPECT_THAT(metadata_result, |
| 505 | + HasErrorMessage("Cannot get metadata before closing the writer")); |
| 506 | +} |
| 507 | + |
| 508 | +TEST_F(EqualityDeleteWriterTest, CloseIsIdempotent) { |
| 509 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 510 | + ASSERT_THAT(writer_result, IsOk()); |
| 511 | + auto writer = std::move(writer_result.value()); |
| 512 | + |
| 513 | + WriteTestDataToEqualityWriter(writer.get()); |
| 514 | + |
| 515 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 516 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 517 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 518 | +} |
| 519 | + |
| 520 | +TEST_F(EqualityDeleteWriterTest, SortOrderIdInMetadata) { |
| 521 | + const int32_t sort_order_id = 7; |
| 522 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions({1}, sort_order_id)); |
| 523 | + ASSERT_THAT(writer_result, IsOk()); |
| 524 | + auto writer = std::move(writer_result.value()); |
| 525 | + |
| 526 | + WriteTestDataToEqualityWriter(writer.get()); |
| 527 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 528 | + |
| 529 | + auto metadata_result = writer->Metadata(); |
| 530 | + ASSERT_THAT(metadata_result, IsOk()); |
| 531 | + const auto& data_file = metadata_result.value().data_files[0]; |
| 532 | + ASSERT_TRUE(data_file->sort_order_id.has_value()); |
| 533 | + EXPECT_EQ(data_file->sort_order_id.value(), sort_order_id); |
| 534 | +} |
| 535 | + |
| 536 | +TEST_F(EqualityDeleteWriterTest, EqualityFieldIdsAccessor) { |
| 537 | + std::vector<int32_t> field_ids = {1, 2, 3}; |
| 538 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions(field_ids)); |
| 539 | + ASSERT_THAT(writer_result, IsOk()); |
| 540 | + auto writer = std::move(writer_result.value()); |
| 541 | + |
| 542 | + auto ids = writer->equality_field_ids(); |
| 543 | + ASSERT_EQ(ids.size(), 3); |
| 544 | + EXPECT_EQ(ids[0], 1); |
| 545 | + EXPECT_EQ(ids[1], 2); |
| 546 | + EXPECT_EQ(ids[2], 3); |
| 547 | +} |
| 548 | + |
| 549 | +TEST_F(EqualityDeleteWriterTest, WriteMultipleBatches) { |
| 550 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 551 | + ASSERT_THAT(writer_result, IsOk()); |
| 552 | + auto writer = std::move(writer_result.value()); |
| 553 | + |
| 554 | + WriteTestDataToEqualityWriter(writer.get()); |
| 555 | + WriteTestDataToEqualityWriter(writer.get()); |
| 556 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 557 | + |
| 558 | + auto metadata_result = writer->Metadata(); |
| 559 | + ASSERT_THAT(metadata_result, IsOk()); |
| 560 | + const auto& data_file = metadata_result.value().data_files[0]; |
| 561 | + EXPECT_EQ(data_file->content, DataFile::Content::kEqualityDeletes); |
| 562 | + EXPECT_GT(data_file->file_size_in_bytes, 0); |
| 563 | +} |
| 564 | + |
426 | 565 | } // namespace iceberg |
0 commit comments