|
27 | 27 |
|
28 | 28 | #include "iceberg/arrow/arrow_fs_file_io_internal.h" |
29 | 29 | #include "iceberg/avro/avro_register.h" |
| 30 | +#include "iceberg/data/equality_delete_writer.h" |
30 | 31 | #include "iceberg/file_format.h" |
31 | 32 | #include "iceberg/manifest/manifest_entry.h" |
32 | 33 | #include "iceberg/parquet/parquet_register.h" |
@@ -264,4 +265,138 @@ TEST_F(DataWriterTest, WriteMultipleBatches) { |
264 | 265 | EXPECT_GT(data_file->file_size_in_bytes, 0); |
265 | 266 | } |
266 | 267 |
|
| 268 | +class EqualityDeleteWriterTest : public DataWriterTest { |
| 269 | + protected: |
| 270 | + EqualityDeleteWriterOptions MakeDeleteOptions( |
| 271 | + std::vector<int32_t> equality_field_ids = {1, 2}, |
| 272 | + std::optional<int32_t> sort_order_id = std::nullopt) { |
| 273 | + return EqualityDeleteWriterOptions{ |
| 274 | + .path = "test_eq_deletes.parquet", |
| 275 | + .schema = schema_, |
| 276 | + .spec = partition_spec_, |
| 277 | + .partition = PartitionValues{}, |
| 278 | + .format = FileFormatType::kParquet, |
| 279 | + .io = file_io_, |
| 280 | + .equality_field_ids = std::move(equality_field_ids), |
| 281 | + .sort_order_id = sort_order_id, |
| 282 | + .properties = {{"write.parquet.compression-codec", "uncompressed"}}, |
| 283 | + }; |
| 284 | + } |
| 285 | + |
| 286 | + void WriteTestDataToEqualityWriter(EqualityDeleteWriter* writer) { |
| 287 | + auto test_data = CreateTestData(); |
| 288 | + ArrowArray arrow_array; |
| 289 | + ASSERT_TRUE(::arrow::ExportArray(*test_data, &arrow_array).ok()); |
| 290 | + ASSERT_THAT(writer->Write(&arrow_array), IsOk()); |
| 291 | + } |
| 292 | +}; |
| 293 | + |
| 294 | +TEST_F(EqualityDeleteWriterTest, WriteAndClose) { |
| 295 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 296 | + ASSERT_THAT(writer_result, IsOk()); |
| 297 | + auto writer = std::move(writer_result.value()); |
| 298 | + |
| 299 | + WriteTestDataToEqualityWriter(writer.get()); |
| 300 | + |
| 301 | + auto length_result = writer->Length(); |
| 302 | + ASSERT_THAT(length_result, IsOk()); |
| 303 | + EXPECT_GT(length_result.value(), 0); |
| 304 | + |
| 305 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 306 | +} |
| 307 | + |
| 308 | +TEST_F(EqualityDeleteWriterTest, MetadataAfterClose) { |
| 309 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 310 | + ASSERT_THAT(writer_result, IsOk()); |
| 311 | + auto writer = std::move(writer_result.value()); |
| 312 | + |
| 313 | + WriteTestDataToEqualityWriter(writer.get()); |
| 314 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 315 | + |
| 316 | + auto metadata_result = writer->Metadata(); |
| 317 | + ASSERT_THAT(metadata_result, IsOk()); |
| 318 | + |
| 319 | + const auto& write_result = metadata_result.value(); |
| 320 | + ASSERT_EQ(write_result.data_files.size(), 1); |
| 321 | + |
| 322 | + const auto& data_file = write_result.data_files[0]; |
| 323 | + EXPECT_EQ(data_file->content, DataFile::Content::kEqualityDeletes); |
| 324 | + EXPECT_EQ(data_file->file_path, "test_eq_deletes.parquet"); |
| 325 | + EXPECT_EQ(data_file->file_format, FileFormatType::kParquet); |
| 326 | + EXPECT_GT(data_file->file_size_in_bytes, 0); |
| 327 | + |
| 328 | + // Equality field ids must be set |
| 329 | + ASSERT_EQ(data_file->equality_ids.size(), 2); |
| 330 | + EXPECT_EQ(data_file->equality_ids[0], 1); |
| 331 | + EXPECT_EQ(data_file->equality_ids[1], 2); |
| 332 | +} |
| 333 | + |
| 334 | +TEST_F(EqualityDeleteWriterTest, MetadataBeforeCloseReturnsError) { |
| 335 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 336 | + ASSERT_THAT(writer_result, IsOk()); |
| 337 | + auto writer = std::move(writer_result.value()); |
| 338 | + |
| 339 | + auto metadata_result = writer->Metadata(); |
| 340 | + ASSERT_THAT(metadata_result, IsError(ErrorKind::kValidationFailed)); |
| 341 | + EXPECT_THAT(metadata_result, |
| 342 | + HasErrorMessage("Cannot get metadata before closing the writer")); |
| 343 | +} |
| 344 | + |
| 345 | +TEST_F(EqualityDeleteWriterTest, CloseIsIdempotent) { |
| 346 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 347 | + ASSERT_THAT(writer_result, IsOk()); |
| 348 | + auto writer = std::move(writer_result.value()); |
| 349 | + |
| 350 | + WriteTestDataToEqualityWriter(writer.get()); |
| 351 | + |
| 352 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 353 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 354 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 355 | +} |
| 356 | + |
| 357 | +TEST_F(EqualityDeleteWriterTest, SortOrderIdInMetadata) { |
| 358 | + const int32_t sort_order_id = 7; |
| 359 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions({1}, sort_order_id)); |
| 360 | + ASSERT_THAT(writer_result, IsOk()); |
| 361 | + auto writer = std::move(writer_result.value()); |
| 362 | + |
| 363 | + WriteTestDataToEqualityWriter(writer.get()); |
| 364 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 365 | + |
| 366 | + auto metadata_result = writer->Metadata(); |
| 367 | + ASSERT_THAT(metadata_result, IsOk()); |
| 368 | + const auto& data_file = metadata_result.value().data_files[0]; |
| 369 | + ASSERT_TRUE(data_file->sort_order_id.has_value()); |
| 370 | + EXPECT_EQ(data_file->sort_order_id.value(), sort_order_id); |
| 371 | +} |
| 372 | + |
| 373 | +TEST_F(EqualityDeleteWriterTest, EqualityFieldIdsAccessor) { |
| 374 | + std::vector<int32_t> field_ids = {1, 2, 3}; |
| 375 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions(field_ids)); |
| 376 | + ASSERT_THAT(writer_result, IsOk()); |
| 377 | + auto writer = std::move(writer_result.value()); |
| 378 | + |
| 379 | + auto ids = writer->equality_field_ids(); |
| 380 | + ASSERT_EQ(ids.size(), 3); |
| 381 | + EXPECT_EQ(ids[0], 1); |
| 382 | + EXPECT_EQ(ids[1], 2); |
| 383 | + EXPECT_EQ(ids[2], 3); |
| 384 | +} |
| 385 | + |
| 386 | +TEST_F(EqualityDeleteWriterTest, WriteMultipleBatches) { |
| 387 | + auto writer_result = EqualityDeleteWriter::Make(MakeDeleteOptions()); |
| 388 | + ASSERT_THAT(writer_result, IsOk()); |
| 389 | + auto writer = std::move(writer_result.value()); |
| 390 | + |
| 391 | + WriteTestDataToEqualityWriter(writer.get()); |
| 392 | + WriteTestDataToEqualityWriter(writer.get()); |
| 393 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 394 | + |
| 395 | + auto metadata_result = writer->Metadata(); |
| 396 | + ASSERT_THAT(metadata_result, IsOk()); |
| 397 | + const auto& data_file = metadata_result.value().data_files[0]; |
| 398 | + EXPECT_EQ(data_file->content, DataFile::Content::kEqualityDeletes); |
| 399 | + EXPECT_GT(data_file->file_size_in_bytes, 0); |
| 400 | +} |
| 401 | + |
267 | 402 | } // namespace iceberg |
0 commit comments